24 #if !defined( BOLT_CL_CONTROL_H )
25 #define BOLT_CL_CONTROL_H
32 #include <boost/thread/mutex.hpp>
33 #include <boost/thread/locks.hpp>
34 #include <boost/shared_ptr.hpp>
104 enum e_UseHostMode {NoUseHost, UseHost};
105 enum e_RunMode {Automatic,
110 enum e_AutoTuneMode{NoAutoTune=0x0,
112 AutoTuneWorkShape=0x2,
115 static const unsigned None=0;
116 static const unsigned Compile = 0x1;
117 static const unsigned ShowCode = 0x2;
118 static const unsigned SaveCompilerTemps = 0x4;
119 static const unsigned DebugKernelRun = 0x8;
120 static const unsigned AutoTune = 0x10;
123 enum e_WaitMode {BalancedWait,
133 const ::cl::CommandQueue& commandQueue =
getDefault().getCommandQueue(),
134 e_UseHostMode useHost=
getDefault().getUseHost(),
137 m_commandQueue(commandQueue),
139 m_forceRunMode(OpenCL),
140 m_defaultRunMode(OpenCL),
143 m_wgPerComputeUnit(
getDefault().m_wgPerComputeUnit),
144 m_compileOptions(
getDefault().m_compileOptions),
145 m_compileForAllDevices(
getDefault().m_compileForAllDevices),
151 control(
const control& ref) :
152 m_commandQueue(ref.m_commandQueue),
153 m_useHost(ref.m_useHost),
154 m_forceRunMode(ref.m_forceRunMode),
155 m_defaultRunMode(ref.m_defaultRunMode),
156 m_debug(ref.m_debug),
157 m_autoTune(ref.m_autoTune),
158 m_wgPerComputeUnit(ref.m_wgPerComputeUnit),
159 m_compileOptions(ref.m_compileOptions),
160 m_compileForAllDevices(ref.m_compileForAllDevices),
161 m_waitMode(ref.m_waitMode),
162 m_unroll(ref.m_unroll)
172 void setCommandQueue(::cl::CommandQueue commandQueue) { m_commandQueue = commandQueue; };
177 void setUseHost(e_UseHostMode useHost) { m_useHost = useHost; };
217 ::cl::CommandQueue& getCommandQueue( ) {
return m_commandQueue; };
218 const ::cl::CommandQueue& getCommandQueue( )
const {
return m_commandQueue; };
219 ::cl::Context getContext()
const {
return m_commandQueue.getInfo<CL_QUEUE_CONTEXT>();};
220 ::cl::Device getDevice()
const {
return m_commandQueue.getInfo<CL_QUEUE_DEVICE>();};
221 e_UseHostMode getUseHost()
const {
return m_useHost; };
222 e_RunMode getForceRunMode()
const {
return m_forceRunMode; };
223 e_RunMode getDefaultPathToRun()
const {
return m_defaultRunMode; };
224 unsigned getDebugMode()
const {
return m_debug;};
225 int const getWGPerComputeUnit()
const {
return m_wgPerComputeUnit; };
226 const ::std::string getCompileOptions()
const {
return m_compileOptions; };
227 e_WaitMode getWaitMode()
const {
return m_waitMode; };
228 int getUnroll()
const {
return m_unroll; };
229 bool getCompileForAllDevices()
const {
return m_compileForAllDevices; };
249 static control _defaultControl(
true );
250 return _defaultControl;
253 static void printPlatforms(
bool printDevices =
true, cl_device_type deviceType = CL_DEVICE_TYPE_ALL );
254 static void printPlatformsRange( std::vector< ::cl::Platform >::iterator begin, std::vector< ::cl::Platform >::iterator end,
255 bool printDevices =
true, cl_device_type deviceType = CL_DEVICE_TYPE_ALL );
282 m_debug(
debug::None),
283 m_autoTune(AutoTuneAll),
284 m_wgPerComputeUnit(8),
285 m_compileForAllDevices(true),
286 m_waitMode(BusyWait),
289 ::cl_device_type dType = CL_DEVICE_TYPE_CPU;
290 if(m_commandQueue() != NULL)
292 ::cl::Device device = m_commandQueue.getInfo<CL_QUEUE_DEVICE>();
293 dType = device.getInfo<CL_DEVICE_TYPE>();
295 if(dType == CL_DEVICE_TYPE_CPU || m_commandQueue() == NULL)
300 m_forceRunMode = MultiCoreCpu;
301 m_defaultRunMode = MultiCoreCpu;
303 m_forceRunMode = SerialCpu;
304 m_defaultRunMode = SerialCpu;
310 m_forceRunMode = OpenCL;
311 m_defaultRunMode = OpenCL;
315 ::cl::CommandQueue m_commandQueue;
316 e_UseHostMode m_useHost;
317 e_RunMode m_forceRunMode;
318 e_RunMode m_defaultRunMode;
319 e_AutoTuneMode m_autoTune;
321 int m_wgPerComputeUnit;
322 ::std::string m_compileOptions;
323 bool m_compileForAllDevices;
324 e_WaitMode m_waitMode;
329 ::cl::Context buffContext;
330 cl_mem_flags memFlags;
331 const void* host_ptr;
334 struct descBufferValue
338 ::cl::Buffer buffBuff;
341 struct descBufferComp
343 bool operator( )(
const descBufferKey& lhs,
const descBufferKey& rhs )
const
345 if( lhs.memFlags < rhs.memFlags )
349 else if( lhs.memFlags == rhs.memFlags )
351 if( lhs.buffContext( ) < rhs.buffContext( ) )
355 else if( lhs.buffContext( ) == rhs.buffContext( ) )
357 if( lhs.host_ptr < rhs.host_ptr )
378 typedef std::multimap< descBufferKey, descBufferValue, descBufferComp > mapBufferType;
390 mapBufferType::iterator m_iter;
395 UnlockBuffer( control& p_control, mapBufferType::iterator it ): m_iter( it ), m_control( p_control )
398 void operator( )(
const void* pBuff )
402 boost::lock_guard< boost::mutex > lock( m_control.mapGuard );
403 m_iter->second.inUse =
false;
407 friend class UnlockBuffer;
408 mapBufferType mapBuffer;
409 boost::mutex mapGuard;