Bolt  1.3
C++ template library with support for OpenCL
control.h
Go to the documentation of this file.
1 /***************************************************************************
2 * © 2012,2014 Advanced Micro Devices, Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 
16 ***************************************************************************/
17 
18 
22 #if !defined( BOLT_AMP_CONTROL_H )
23 #define BOLT_AMP_CONTROL_H
24 
25 #pragma once
26 
27 #include <amp.h>
28 #include <string>
29 #include <map>
30 
31 namespace bolt
32 {
33 namespace amp
34 {
35 
80 class control {
81 public:
82  enum e_UseHostMode {
83  NoUseHost,
84  UseHost};
85  enum e_RunMode {
86  Automatic,
87  SerialCpu,
88  MultiCoreCpu,
89  Gpu };
90 
91  enum e_AutoTuneMode{NoAutoTune=0x0,
92  AutoTuneDevice=0x1,
93  AutoTuneWorkShape=0x2,
94  AutoTuneAll=0x3}; // FIXME, experimental
95  struct debug {
96  static const unsigned None=0;
97  static const unsigned Compile = 0x1;
98  static const unsigned ShowCode = 0x2;
99  static const unsigned SaveCompilerTemps = 0x4;
100  static const unsigned DebugKernelRun = 0x8;
101  static const unsigned AutoTune = 0x10;
102  };
103 
104  enum e_WaitMode {
105  BalancedWait, // Balance of Busy and Nice: tries to use Busy for short-running kernels. \todo: Balanced currently maps to nice.
106  NiceWait, // Use an OS semaphore to detect completion status.
107  BusyWait, // Busy a CPU core continuously monitoring results. Lowest-latency, but requires a dedicated core.
108  ClFinish, // Call clFinish on the queue.
109  };
110 
111 public:
112 
113  // Construct a new control structure, copying from default control for arguments that are not overridden.
114  control(
115  Concurrency::accelerator accel=getDefault().getAccelerator(),
116  e_UseHostMode useHost=getDefault().getUseHost(),
117  unsigned debug=getDefault().getDebug()
118  ):
119  m_accelerator(accel),
120  m_useHost(useHost),
121  m_forceRunMode(getDefault().m_forceRunMode),
122  m_defaultRunMode(getDefault().m_defaultRunMode),
123  m_debug(debug),
124  m_autoTune(getDefault().m_autoTune),
125  m_wgPerComputeUnit(getDefault().m_wgPerComputeUnit),
126  m_waitMode(getDefault().m_waitMode),
127  m_unroll(getDefault().m_unroll)
128  {};
129 
130  control( const control& ref) :
131  m_accelerator(ref.m_accelerator),
132  m_useHost(ref.m_useHost),
133  m_forceRunMode(ref.m_forceRunMode),
134  m_defaultRunMode(ref.m_defaultRunMode),
135  m_debug(ref.m_debug),
136  m_autoTune(ref.m_autoTune),
137  m_wgPerComputeUnit(ref.m_wgPerComputeUnit),
138  m_waitMode(ref.m_waitMode),
139  m_unroll(ref.m_unroll)
140  {
141  //printf("control::copy construcor\n");
142  };
143 
144  //setters:
146  void setAccelerator(::Concurrency::accelerator accel) { m_accelerator = accel; };
147 
151  void setUseHost(e_UseHostMode useHost) { m_useHost = useHost; };
152 
157  void setForceRunMode(e_RunMode forceRunMode) { m_forceRunMode = forceRunMode; };
158 
169  void setDebug(unsigned debug) { m_debug = debug; };
170 
175  void setWGPerComputeUnit(int wgPerComputeUnit) { m_wgPerComputeUnit = wgPerComputeUnit; };
176 
178  void setWaitMode(e_WaitMode waitMode) { m_waitMode = waitMode; };
179 
181  void setUnroll(int unroll) { m_unroll = unroll; };
182 
183  // getters:
184  Concurrency::accelerator& getAccelerator( ) { return m_accelerator; };
185  const Concurrency::accelerator& getAccelerator( ) const { return m_accelerator; };
186 
187  e_UseHostMode getUseHost() const { return m_useHost; };
188  e_RunMode getForceRunMode() const { return m_forceRunMode; };
189  e_RunMode getDefaultPathToRun() const { return m_defaultRunMode; };
190  unsigned getDebug() const { return m_debug;};
191  int const getWGPerComputeUnit() const { return m_wgPerComputeUnit; };
192  e_WaitMode getWaitMode() const { return m_waitMode; };
193  int getUnroll() const { return m_unroll; };
194 
210  static control &getDefault()
211  {
212  // Default control structure; this can be accessed by the bolt::cl::control::getDefault()
213  static control _defaultControl( true );
214  return _defaultControl;
215  };
216 
217  //TODO - implement the below function in control.cpp
218  /*static void printPlatforms( bool printDevices = true, cl_device_type deviceType = CL_DEVICE_TYPE_ALL );
219  static void printPlatformsRange( std::vector< ::cl::Platform >::iterator begin, std::vector< ::cl::Platform >::iterator end,
220  bool printDevices = true, cl_device_type deviceType = CL_DEVICE_TYPE_ALL );*/
221 
222 private:
223 
224  // This is the private constructor is only used to create the initial default control structure.
225  control(bool createGlobal) :
226  m_accelerator( Concurrency::accelerator::default_accelerator ),
227  m_useHost(UseHost),
228  m_forceRunMode(Automatic),
229  m_debug(debug::None),
230  m_autoTune(AutoTuneAll),
231  m_wgPerComputeUnit(8),
232  m_waitMode(BusyWait),
233  m_unroll(1)
234  {
235 
236  if(m_accelerator.default_accelerator == NULL)
237  {
238 #ifdef ENABLE_TBB
239  m_forceRunMode = MultiCoreCpu;
240  m_defaultRunMode = MultiCoreCpu;
241 #else
242  m_forceRunMode = SerialCpu;
243  m_defaultRunMode = SerialCpu;
244 #endif
245  }
246  else
247  {
248  m_forceRunMode = Gpu;
249  m_defaultRunMode = Gpu;
250  }
251 
252 
253  };
254 
255  //::cl::CommandQueue m_commandQueue;
256  ::Concurrency::accelerator m_accelerator;
257  e_UseHostMode m_useHost;
258  e_RunMode m_forceRunMode;
259  e_RunMode m_defaultRunMode;
260  e_AutoTuneMode m_autoTune; /* auto-tune the choice of device CPU/GPU and workgroup shape */
261  unsigned m_debug;
262  int m_wgPerComputeUnit;
263  e_WaitMode m_waitMode;
264  int m_unroll;
265 };
266 };
267 };
268 
269 // Implementor note:
270 // When adding a new field to this structure, don't forget to:
271 // * Add the new field, ie "int _foo.
272 // * Add setter function and getter function, ie "void foo(int fooValue)" and "int foo const { return _foo; }"
273 // * Add the field to the private constructor. This is used to set the global default "_defaultControl".
274 // * Add the field to the public constructor, copying from the _defaultControl.
275 
276 // Sample usage:
277 // Concurrency::accelerator::default_accelerator
278 // bolt::amp::control ctl(Concurrency::accelerator::default_accelerator);
279 // c.debug(bolt::amp::control::ShowCompile);
280 // bolt::amp::reduce(ctl, a.begin(), a.end(), std::plus<int>);
281 
282 
283 #endif