Bolt  1.3
C++ template library with support for OpenCL
bolt.h
Go to the documentation of this file.
1 /***************************************************************************
2 * © 2012,2014 Advanced Micro Devices, Inc. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 
16 ***************************************************************************/
17 
18 
23 #pragma once
24 #if !defined( BOLT_CL_BOLT_H )
25 #define BOLT_CL_BOLT_H
26 #define __CL_ENABLE_EXCEPTIONS
27 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
28 
29 #include <CL/cl.h>
30 /*For enabling only the OpenCL 1.1 specification uncomment the following line*/
31 //#undef CL_VERSION_1_2
32 #include <CL/cl.hpp>
33 
34 
35 #include <string>
36 #include <map>
37 #include <boost/thread/mutex.hpp>
38 #include "bolt/BoltVersion.h"
39 #include "bolt/cl/control.h"
40 #include "bolt/cl/clcode.h"
41 
42 #define PUSH_BACK_UNIQUE(CONTAINER, ELEMENT) \
43  if (std::find(CONTAINER.begin(), CONTAINER.end(), ELEMENT) == CONTAINER.end()) \
44  CONTAINER.push_back(ELEMENT);
45 
68 namespace bolt {
69  namespace cl {
70 
71  extern const std::string binary_search_kernels;
72  extern const std::string copy_kernels;
73  extern const std::string count_kernels;
74  extern const std::string fill_kernels;
75  extern const std::string gather_kernels;
76  extern const std::string generate_kernels;
77  extern const std::string merge_kernels;
78  extern const std::string min_element_kernels;
79  extern const std::string reduce_kernels;
80  extern const std::string reduce_by_key_kernels;
81  extern const std::string scan_kernels;
82  extern const std::string scan_by_key_kernels;
83  extern const std::string scatter_kernels;
84  extern const std::string sort_kernels;
85  extern const std::string stablesort_kernels;
86  extern const std::string stablesort_by_key_kernels;
87  extern const std::string sort_uint_kernels;
88  extern const std::string sort_int_kernels;
89  extern const std::string sort_float_kernels;
90  extern const std::string sort_common_kernels;
91  extern const std::string sort_by_key_kernels;
92  extern const std::string sort_by_key_int_kernels;
93  extern const std::string sort_by_key_uint_kernels;
94  extern const std::string transform_kernels;
95  extern const std::string transform_reduce_kernels;
96  extern const std::string transform_scan_kernels;
97 
98  // transform_scan kernel names
99  //static std::string transform_scan_kernel_names_array[] = { "perBlockTransformScan", "intraBlockInclusiveScan", "perBlockAddition" };
100  //const std::vector<std::string> transformScanKernelNames(transform_scan_kernel_names_array, transform_scan_kernel_names_array+3);
101 
102  /******************************************************************
103  * Kernel Template Specialization
104  *****************************************************************/
106  {
107  public:
108  // kernel template specializer functor
109  virtual const ::std::string operator() (const ::std::vector< ::std::string >& typeNames) const
110  { return "Error; virtual function not overloaded"; }
111 
112  // add a kernel name
113  void addKernelName( const std::string& kernelName) { kernelNames.push_back(kernelName); }
114 
115  // get the name of a particular kernel
116  const ::std::string name( int kernelIndex ) const { return kernelNames[ kernelIndex ]; }
117 
118  // return number of kernels
119  size_t numKernels() const { return kernelNames.size(); }
120 
121  // kernel vector
122  const ::std::vector< ::std::string > getKernelNames() const { return kernelNames; }
123 
124  public:
125  ::std::vector< ::std::string > kernelNames;
126  };
127 
128  class control;
129  //class KernelTemplateSpecializer;
130 
131  extern std::string fileToString(const std::string &fileName);
132 
133  /**********************************************************************
134  * getKernels
135  * returns vector of cl::Kernel objects either by constructing
136  * and compiling the kernels, or by returning the kernels if
137  * previously compiled.
138  * see bolt/cl/detail/scan.inl for example usage
139  **********************************************************************/
140  ::std::vector< ::cl::Kernel > getKernels(
141  const control& ctl,
142  const ::std::vector< ::std::string >& typeNames,
143  const KernelTemplateSpecializer * const kts,
144  const ::std::vector< ::std::string >& typeDefinitions,
145  const std::string& baseKernelString,
146  const std::string& compileOptions = ""
147  );
148 
155  void getVersion( cl_uint& major, cl_uint& minor, cl_uint& patch );
156 
161  std::string clErrorStringA( const cl_int& status );
162 
167  std::wstring clErrorStringW( const cl_int& status );
168 
175  inline cl_int V_OpenCL( cl_int res, const std::string& msg, size_t lineno )
176  {
177  switch( res )
178  {
179  case CL_SUCCESS:
180  break;
181  default:
182  {
183  std::string tmp;
184  tmp.append( "V_OpenCL< " );
185  tmp.append( clErrorStringA( res ) );
186  tmp.append( " >: " );
187  tmp.append( msg );
188  //std::cout << tmp << std::endl;
189  throw ::cl::Error( res, tmp.c_str( ) );
190  }
191  }
192 
193  return res;
194  }
195  #define V_OPENCL( status, message ) V_OpenCL( status, message, __LINE__ )
196 
197  void wait( const bolt::cl::control &ctl, ::cl::Event &e );
198 
199  /******************************************************************
200  * Program Map - so each kernel is only compiled once
201  *****************************************************************/
205  {
206  ::cl::Context context;
207  ::std::string device;
208  ::std::string compileOptions;
209  ::std::string kernelSource;
210  };
211 
213  {
214  ::cl::Program program;
215  };
216 
218  {
219  bool operator( )( const ProgramMapKey& lhs, const ProgramMapKey& rhs ) const
220  {
221  int comparison;
222  // context
223  // Do I really need to compare the context? Yes, required by OpenCL. -DT
224  if( lhs.context() < rhs.context() )
225  return true;
226  else if( lhs.context() > rhs.context() )
227  return false;
228  // else equal; compare using next element of key
229 
230  // device
231  comparison = lhs.device.compare(rhs.device);
232  //std::cout << "Compare Device: " << comparison << std::endl;
233  if( comparison < 0 )
234  {
235  return true;
236  }
237  else if( comparison > 0 )
238  {
239  return false;
240  }
241  // else equal; compare using next element of key
242 
243  // compileOptions
244  comparison = lhs.compileOptions.compare(rhs.compileOptions);
245  //std::cout << "Compare Options: " << comparison << std::endl;
246  if( comparison < 0 )
247  {
248  return true;
249  }
250  else if( comparison > 0 )
251  {
252  return false;
253  }
254  //else
255  // std::cout << "<" << lhs.compileOptions << "> == <" << rhs.compileOptions << ">" << std::endl;
256  // else equal; compare using next element of key
257 
258  // kernelSource
259  comparison = lhs.kernelSource.compare(rhs.kernelSource);
260  //std::cout << "Compare Source: " << comparison << std::endl;
261  if( comparison < 0 )
262  return true;
263  else if( comparison > 0 )
264  return false;
265  //else
266  // std::cout << "<lhs.kernelSource> == <rhs.kernelSource>" << std::endl;
267  // else equal; compare using next element of key
268 
269  // all elements equal
270  return false;
271  }
272  };
273 
274  typedef ::std::map< ProgramMapKey, ProgramMapValue, ProgramMapKeyComp > ProgramMap;
275  //typedef ::std::map< ::std::string, ProgramMapValue> ProgramMap;
276 
277  // declared in bolt.cpp
278  extern boost::mutex programMapMutex;
279  extern ProgramMap programMap;
280 
281  };
282 };
283 
284 #if defined( _WIN32 )
285 #define ALIGNED( bound ) __declspec( align( bound ) )
286 #else
287 #define ALIGNED( bound ) __attribute__ ( (aligned( bound ) ) )
288 #endif
289 
290 //Visual Studio 2012 is not able to map char to cl_char. Hence this typename is added.
291 BOLT_CREATE_TYPENAME( char );
292 
293 BOLT_CREATE_TYPENAME( cl_char );
294 BOLT_CREATE_TYPENAME( cl_uchar );
295 BOLT_CREATE_TYPENAME( cl_short );
296 BOLT_CREATE_TYPENAME( cl_ushort );
297 BOLT_CREATE_TYPENAME( cl_int );
298 BOLT_CREATE_TYPENAME( cl_uint );
299 BOLT_CREATE_TYPENAME( cl_long );
300 BOLT_CREATE_TYPENAME( cl_ulong );
301 BOLT_CREATE_TYPENAME( cl_float );
302 BOLT_CREATE_TYPENAME( cl_double );
303 
305 //BOLT_CREATE_TYPENAME( cl_int );
306 //BOLT_CREATE_CLCODE( cl_int, "int" );
307 //
308 //BOLT_CREATE_TYPENAME( cl_int2 );
309 //BOLT_CREATE_CLCODE( cl_int2, "int2" );
310 //
311 //BOLT_CREATE_TYPENAME( cl_int4 );
312 //BOLT_CREATE_CLCODE( cl_int4, "int4" );
313 //
314 //BOLT_CREATE_TYPENAME( cl_uint );
315 //BOLT_CREATE_CLCODE( cl_uint, "uint" );
316 //
317 //BOLT_CREATE_TYPENAME( cl_uint2 );
318 //BOLT_CREATE_CLCODE( cl_uint2, "uint2" );
319 //
320 //BOLT_CREATE_TYPENAME( cl_uint4 );
321 //BOLT_CREATE_CLCODE( cl_uint4, "uint4" );
322 //
323 //BOLT_CREATE_TYPENAME( cl_float );
324 //BOLT_CREATE_CLCODE( cl_float, "float" );
325 //
326 //BOLT_CREATE_TYPENAME( cl_float2 );
327 //BOLT_CREATE_CLCODE( cl_float2, "float2" );
328 //
329 //BOLT_CREATE_TYPENAME( cl_float4 );
330 //BOLT_CREATE_CLCODE( cl_float4, "float4" );
331 //
332 //BOLT_CREATE_TYPENAME( cl_double );
333 //BOLT_CREATE_CLCODE( cl_double, "double" );
334 //
335 //BOLT_CREATE_TYPENAME( cl_double2 );
336 //BOLT_CREATE_CLCODE( cl_double2, "double2" );
337 //
338 //BOLT_CREATE_TYPENAME( cl_double4 );
339 //BOLT_CREATE_CLCODE( cl_double4, "double4" );
340 
341 #endif