22 #if !defined( BOLT_CL_DEVICE_VECTOR_H )
23 #define BOLT_CL_DEVICE_VECTOR_H
26 #include <type_traits>
31 #include <boost/iterator/iterator_facade.hpp>
32 #include <boost/iterator/reverse_iterator.hpp>
33 #include <boost/shared_array.hpp>
59 :
public std::random_access_iterator_tag
71 template<
typename T >
77 template<
typename Container >
78 class UnMapBufferFunctor
80 Container& m_Container;
84 UnMapBufferFunctor( Container& rhs ): m_Container( rhs )
87 void operator( )(
const void* pBuff )
89 ::cl::Event unmapEvent;
91 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, const_cast< void* >( pBuff ), NULL, &unmapEvent ),
92 "shared_ptr failed to unmap host memory back to device memory" );
93 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
97 typedef T* naked_pointer;
98 typedef const T* const_naked_pointer;
103 typedef T value_type;
104 typedef ptrdiff_t difference_type;
105 typedef difference_type distance_type;
106 typedef int size_type;
108 typedef boost::shared_array< value_type > pointer;
109 typedef boost::shared_array< const value_type > const_pointer;
118 template<
typename Container >
122 reference_base(Container &rhs, size_type index ): m_Container( rhs ), m_Index( index )
128 operator value_type( )
const
130 cl_int l_Error = CL_SUCCESS;
131 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_Container.m_commQueue.enqueueMapBuffer(
132 m_Container.m_devMemory,
true, CL_MAP_READ, m_Index *
sizeof( value_type ),
sizeof( value_type ), NULL, NULL, &l_Error ) );
133 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
135 value_type valTmp = *result;
137 ::cl::Event unmapEvent;
138 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, result, NULL, &unmapEvent ),
"device_vector failed to unmap host memory back to device memory" );
139 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
146 cl_int l_Error = CL_SUCCESS;
147 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_Container.m_commQueue.enqueueMapBuffer(
148 m_Container.m_devMemory,
true, CL_MAP_WRITE_INVALIDATE_REGION, m_Index *
sizeof( value_type ),
sizeof( value_type ), NULL, NULL, &l_Error ) );
149 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
153 ::cl::Event unmapEvent;
154 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, result, NULL, &unmapEvent ),
"device_vector failed to unmap host memory back to device memory" );
155 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
165 cl_int l_Error = CL_SUCCESS;
166 value_type value =
static_cast<value_type
>(rhs);
167 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_Container.m_commQueue.enqueueMapBuffer(
168 m_Container.m_devMemory,
true, CL_MAP_WRITE_INVALIDATE_REGION, m_Index *
sizeof( value_type ),
sizeof( value_type ), NULL, NULL, &l_Error ) );
169 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
173 ::cl::Event unmapEvent;
174 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, result, NULL, &unmapEvent ),
"device_vector failed to unmap host memory back to device memory" );
175 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
187 size_type getIndex()
const
193 Container& m_Container;
221 template<
typename Container >
222 class iterator_base:
public boost::iterator_facade< iterator_base< Container >, value_type, device_vector_tag,
223 typename device_vector::reference, int >
226 typedef typename boost::iterator_facade< iterator_base< Container >, value_type,
device_vector_tag,
241 difference_type m_Index;
242 difference_type m_Ptr1[ 3 ];
247 iterator_base( ): m_Container( getContainer() ), m_Index( 0 )
251 iterator_base( Container& rhs, difference_type index ): m_Container( rhs ), m_Index( index )
255 template<
typename OtherContainer >
256 iterator_base(
const iterator_base< OtherContainer >& rhs ): m_Container( rhs.m_Container ), m_Index( rhs.m_Index )
259 iterator_base( value_type *ptr ): m_Container( ptr ), m_Index( 0 )
264 iterator_base< Container >& operator = (
const iterator_base< Container >& rhs )
266 m_Container = rhs.m_Container;
267 m_Index = rhs.m_Index;
271 iterator_base< Container > & base()
276 const iterator_base< Container > & base()
const
281 iterator_base< Container > & operator+= (
const difference_type & n )
287 iterator_base< Container >& operator = (
const difference_type & n )
295 const iterator_base< Container > operator + (
const difference_type & n )
const
297 iterator_base< Container > result(*
this);
302 Container& getContainer( )
const
307 int setKernelBuffers(
int arg_num, ::cl::Kernel &kernel)
const
309 const ::cl::Buffer &buffer = getContainer().getBuffer();
310 kernel.setArg(arg_num, buffer );
317 const Payload gpuPayload( )
const
319 Payload payload = { m_Index, { 0, 0, 0 } };
326 const difference_type gpuPayloadSize( )
const
328 cl_int l_Error = CL_SUCCESS;
329 ::cl::Device which_device;
330 l_Error = m_Container.m_commQueue.getInfo(CL_QUEUE_DEVICE,&which_device );
332 cl_uint deviceBits = which_device.getInfo< CL_DEVICE_ADDRESS_BITS >( );
335 difference_type payloadSize =
sizeof( difference_type ) + ( deviceBits >> 3 );
338 if( deviceBits == 64 )
345 difference_type m_Index;
346 difference_type distance_to(
const iterator_base< Container >& rhs )
const
348 return static_cast< difference_type
>( rhs.m_Index - m_Index );
355 friend class boost::iterator_core_access;
361 template <
typename >
friend class iterator_base;
363 void advance( difference_type n )
378 template<
typename OtherContainer >
379 bool equal(
const iterator_base< OtherContainer >& rhs )
const
381 bool sameIndex = rhs.m_Index == m_Index;
382 bool sameContainer = (&m_Container == &rhs.m_Container );
384 return ( sameIndex && sameContainer );
389 return m_Container[ m_Index ];
392 Container& m_Container;
405 template<
typename Container >
406 class reverse_iterator_base:
public boost::iterator_facade< reverse_iterator_base< Container >, value_type, std::random_access_iterator_tag, typename device_vector::reference, int >
415 template<
typename OtherContainer >
423 m_Container = lhs.m_Container;
424 m_Index = lhs.m_Index;
440 #if !defined(_WIN32) && defined(__x86_64__)
463 return static_cast< difference_type
>( m_Index - lhs.m_Index );
468 friend class boost::iterator_core_access;
476 void advance( difference_type n )
492 template<
typename OtherContainer >
495 bool sameIndex = lhs.m_Index == m_Index;
496 bool sameContainer = (&m_Container == &lhs.m_Container );
498 return ( sameIndex && sameContainer );
503 return m_Container[ m_Index ];
506 Container& m_Container;
534 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
547 device_vector( size_type newSize,
const value_type& value = value_type( ), cl_mem_flags flags = CL_MEM_READ_WRITE,
548 bool init =
true,
const control& ctl =
control::getDefault( ) ): m_Size( newSize ), m_commQueue( ctl.getCommandQueue( ) ), m_Flags( flags )
550 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
553 cl_int l_Error = CL_SUCCESS;
554 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
555 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
559 m_devMemory = ::cl::Buffer( l_Context, m_Flags, m_Size *
sizeof( value_type ) );
563 std::vector< ::cl::Event > fillEvent( 1 );
571 size_t sizeDS =
sizeof(value_type);
573 if( !( sizeDS & (sizeDS - 1 ) ) )
575 V_OPENCL( m_commQueue.enqueueFillBuffer< value_type >( m_devMemory, value, 0,
576 newSize *
sizeof( value_type ), NULL, &fillEvent.front( ) ),
577 "device_vector failed to fill the internal buffer with the requested pattern");
582 ::cl::Event fill_mapEvent;
583 value_type *host_buffer = ( value_type* )ctl.getCommandQueue( ).enqueueMapBuffer (
586 CL_MAP_READ | CL_MAP_WRITE,
588 sizeof( value_type )*newSize,
593 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
594 bolt::cl::wait( ctl, fill_mapEvent );
598 std::fill_n( stdext::make_checked_array_iterator( host_buffer, newSize ),
609 l_Error = ctl.getCommandQueue( ).enqueueUnmapMemObject( m_devMemory,
612 &fillEvent.front( ) );
613 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
617 catch( std::exception& e )
619 std::cout <<
"device_vector enqueueFillBuffer error condition reported:" << std::endl << e.what() << std::endl;
626 V_OPENCL( m_commQueue.enqueueWaitForEvents( fillEvent ),
"device_vector failed to wait for an event" );
628 catch( std::exception& e )
630 std::cout <<
"device_vector enqueueFillBuffer enqueueWaitForEvents error condition reported:" << std::endl << e.what() << std::endl;
649 template<
typename InputIterator >
650 device_vector(
const InputIterator
begin, size_type newSize, cl_mem_flags flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR,
652 typename std::enable_if< !std::is_integral< InputIterator >::value >::type* = 0 ): m_Size( newSize ),
653 m_commQueue( ctl.getCommandQueue( ) ), m_Flags( flags )
655 static_assert( std::is_convertible< value_type,
typename std::iterator_traits< InputIterator >::value_type >::value,
656 "iterator value_type does not convert to device_vector value_type" );
657 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
665 cl_int l_Error = CL_SUCCESS;
666 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
667 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
669 if( m_Flags & CL_MEM_USE_HOST_PTR )
671 m_devMemory = ::cl::Buffer( l_Context, m_Flags, m_Size *
sizeof( value_type ),
672 reinterpret_cast< value_type* >( const_cast< value_type* >( &*begin ) ) );
676 m_devMemory = ::cl::Buffer( l_Context, m_Flags, m_Size *
sizeof( value_type ) );
680 size_t byteSize = m_Size *
sizeof( value_type );
684 naked_pointer pointer =
static_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer(
685 m_devMemory, CL_TRUE, CL_MEM_WRITE_ONLY, 0, byteSize, 0, 0, &l_Error) );
686 V_OPENCL( l_Error,
"enqueueMapBuffer failed in device_vector constructor" );
688 std::copy( begin, begin + m_Size, stdext::checked_array_iterator< naked_pointer >( pointer, m_Size ) );
690 std::copy( begin, begin + m_Size, pointer );
692 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, pointer, 0, 0 );
693 V_OPENCL( l_Error,
"enqueueUnmapMemObject failed in device_vector constructor" );
705 template<
typename InputIterator >
707 typename std::enable_if< !std::is_integral< InputIterator >::value >::type* = 0 ): m_commQueue( ctl.getCommandQueue( ) ), m_Flags( flags )
709 static_assert( std::is_convertible< value_type,
typename std::iterator_traits< InputIterator >::value_type >::value,
710 "iterator value_type does not convert to device_vector value_type" );
711 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
714 cl_int l_Error = CL_SUCCESS;
715 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
716 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
718 m_Size =
static_cast< size_type
>( std::distance( begin, end ) );
724 size_t byteSize = m_Size *
sizeof( value_type );
726 if( m_Flags & CL_MEM_USE_HOST_PTR )
728 m_devMemory = ::cl::Buffer( l_Context, m_Flags, byteSize,
729 reinterpret_cast< value_type* >( const_cast< value_type* >( std::addressof(*(begin) ) ) ) );
734 m_devMemory = ::cl::Buffer( l_Context, m_Flags, byteSize );
738 naked_pointer pointer =
static_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer(
739 m_devMemory, CL_TRUE, CL_MEM_WRITE_ONLY, 0, byteSize, 0, 0, &l_Error) );
740 V_OPENCL( l_Error,
"enqueueMapBuffer failed in device_vector constructor" );
742 std::copy( begin, end, stdext::checked_array_iterator< naked_pointer >( pointer, m_Size ) );
746 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, pointer, 0, 0 );
747 V_OPENCL( l_Error,
"enqueueUnmapMemObject failed in device_vector constructor" );
759 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
763 cl_int l_Error = CL_SUCCESS;
764 m_Flags = m_devMemory.getInfo< CL_MEM_FLAGS >( &l_Error );
765 V_OPENCL( l_Error,
"device_vector failed to query for the memory flags of the ::cl::Buffer object" );
777 size_type l_srcSize = m_Size *
sizeof( value_type );
778 ::cl::Event copyEvent;
780 cl_int l_Error = CL_SUCCESS;
781 l_Error = m_commQueue.enqueueCopyBuffer( rhs.m_devMemory, m_devMemory, 0, 0, l_srcSize, NULL, ©Event );
782 V_OPENCL( l_Error,
"device_vector failed to copy data inside of operator=()" );
783 V_OPENCL( copyEvent.wait( ),
"device_vector failed to wait for copy event" );
791 m_Flags = rhs.m_Flags;
792 m_commQueue = rhs.m_commQueue;
801 size_type l_srcSize = m_Size *
sizeof( value_type );
802 ::cl::Event copyEvent;
804 cl_int l_Error = CL_SUCCESS;
805 l_Error = m_commQueue.enqueueCopyBuffer( rhs.m_devMemory, m_devMemory, 0, 0, l_srcSize, NULL, ©Event );
806 V_OPENCL( l_Error,
"device_vector failed to copy data inside of operator=()" );
807 V_OPENCL( copyEvent.wait( ),
"device_vector failed to wait for copy event" );
826 void resize( size_type reqSize,
const value_type& val = value_type( ) )
828 if( (m_Flags & CL_MEM_USE_HOST_PTR) != 0 )
830 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
831 "A device_vector can not resize() memory not under its direct control" );
840 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
841 "The amount of memory requested exceeds what is available" );
843 cl_int l_Error = CL_SUCCESS;
845 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
846 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::Buffer object" );
848 size_type l_reqSize = reqSize *
sizeof( value_type );
849 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, l_reqSize, NULL, &l_Error );
851 size_type l_srcSize = m_Size *
sizeof( value_type );
857 if( l_reqSize > l_srcSize )
859 std::vector< ::cl::Event > copyEvent( 1 );
860 l_Error = m_commQueue.enqueueCopyBuffer( m_devMemory,
866 ©Event.front( ) );
867 V_OPENCL( l_Error,
"device_vector failed to copy data to the new ::cl::Buffer object" );
868 ::cl::Event fillEvent;
870 size_t sizeDS =
sizeof(value_type);
871 if( !( sizeDS & (sizeDS - 1 ) ) )
873 l_Error = m_commQueue.enqueueFillBuffer< value_type >( l_tmpBuffer,
876 (l_reqSize - l_srcSize),
879 V_OPENCL( l_Error,
"device_vector failed to fill the new data with the provided pattern" );
885 ::cl::Event fill_mapEvent;
886 value_type *host_buffer = ( value_type* )m_commQueue.enqueueMapBuffer (
889 CL_MAP_READ | CL_MAP_WRITE,
891 (l_reqSize - l_srcSize),
896 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
897 fill_mapEvent.wait( );
901 std::fill_n( stdext::make_checked_array_iterator( host_buffer , reqSize ),
912 l_Error = m_commQueue.enqueueUnmapMemObject( l_tmpBuffer,
916 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
919 l_Error = fillEvent.wait( );
920 V_OPENCL( l_Error,
"device_vector failed to wait for fill event" );
924 std::vector< ::cl::Event > copyEvent( 1 );
925 l_Error = m_commQueue.enqueueCopyBuffer( m_devMemory, l_tmpBuffer, 0, 0, l_reqSize, NULL, ©Event.front( ) );
926 V_OPENCL( l_Error,
"device_vector failed to copy data to the new ::cl::Buffer object" );
928 l_Error = m_commQueue.enqueueWaitForEvents( copyEvent );
929 V_OPENCL( l_Error,
"device_vector failed to wait for copy event" );
934 ::cl::Event fillEvent;
935 size_t sizeDS =
sizeof(value_type);
936 if( !( sizeDS & (sizeDS - 1 ) ) )
938 l_Error = m_commQueue.enqueueFillBuffer< value_type >( l_tmpBuffer, val, 0, l_reqSize, NULL, &fillEvent );
939 V_OPENCL( l_Error,
"device_vector failed to fill the new data with the provided pattern" );
945 ::cl::Event fill_mapEvent;
946 value_type *host_buffer = ( value_type* )m_commQueue.enqueueMapBuffer (
949 CL_MAP_READ | CL_MAP_WRITE,
956 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
957 fill_mapEvent.wait( );
961 std::fill_n( stdext::make_checked_array_iterator( host_buffer , reqSize ),
971 l_Error = m_commQueue.enqueueUnmapMemObject( l_tmpBuffer,
975 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
979 l_Error = fillEvent.wait( );
980 V_OPENCL( l_Error,
"device_vector failed to wait for fill event" );
987 m_devMemory = l_tmpBuffer;
1004 cl_int l_Error = CL_SUCCESS;
1006 ::cl::Device l_Device = m_commQueue.getInfo< CL_QUEUE_DEVICE >( &l_Error );
1007 V_OPENCL( l_Error,
"device_vector failed to query for the device of the command queue" );
1009 cl_ulong l_MaxSize = l_Device.getInfo< CL_DEVICE_MAX_MEM_ALLOC_SIZE >( &l_Error );
1010 V_OPENCL( l_Error,
"device_vector failed to query device for the maximum memory size" );
1012 return static_cast< size_type
>( l_MaxSize /
sizeof( value_type ) );
1030 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
"The amount of memory requested exceeds what is available" );
1033 cl_int l_Error = CL_SUCCESS;
1034 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
1035 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
1039 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, reqSize *
sizeof( value_type ) );
1040 m_devMemory = l_tmpBuffer;
1044 size_type l_size = reqSize *
sizeof( value_type );
1046 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, l_size, NULL, &l_Error );
1047 V_OPENCL( l_Error,
"device_vector can not create an temporary internal OpenCL buffer" );
1049 size_type l_srcSize =
static_cast<size_type
> (m_devMemory.getInfo< CL_MEM_SIZE >( &l_Error ) );
1050 V_OPENCL( l_Error,
"device_vector failed to request the size of the ::cl::Buffer object" );
1052 ::cl::Event copyEvent;
1053 V_OPENCL( m_commQueue.enqueueCopyBuffer( m_devMemory, l_tmpBuffer, 0, 0, l_srcSize, NULL, ©Event ),
1054 "device_vector failed to copy from buffer to buffer " );
1057 V_OPENCL( copyEvent.wait( ),
"device_vector failed to wait on an event object" );
1060 m_devMemory = l_tmpBuffer;
1070 size_t l_memSize = 0;
1071 cl_int l_Error = CL_SUCCESS;
1076 if(m_devMemory() == NULL)
1079 l_memSize = m_devMemory.getInfo< CL_MEM_SIZE >( &l_Error );
1080 V_OPENCL( l_Error,
"device_vector failed to request the size of the ::cl::Buffer object" );
1081 return static_cast< size_type
>( l_memSize /
sizeof( value_type ) );
1094 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
"device_vector size can not be greater than capacity( )" );
1100 cl_int l_Error = CL_SUCCESS;
1101 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
1102 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
1104 size_type l_newSize = m_Size *
sizeof( value_type );
1105 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, l_newSize, NULL, &l_Error );
1106 V_OPENCL( l_Error,
"device_vector can not create an temporary internal OpenCL buffer" );
1109 size_type l_srcSize =
static_cast< size_type
>( m_devMemory.getInfo< CL_MEM_SIZE >( &l_Error ) );
1110 V_OPENCL( l_Error,
"device_vector failed to request the size of the ::cl::Buffer object" );
1112 std::vector< ::cl::Event > copyEvent( 1 );
1113 l_Error = m_commQueue.enqueueCopyBuffer( m_devMemory, l_tmpBuffer, 0, 0, l_newSize, NULL, ©Event.front( ) );
1114 V_OPENCL( l_Error,
"device_vector failed to copy data to the new ::cl::Buffer object" );
1117 l_Error = m_commQueue.enqueueWaitForEvents( copyEvent );
1118 V_OPENCL( l_Error,
"device_vector failed to wait for copy event" );
1121 m_devMemory = l_tmpBuffer;
1138 cl_int l_Error = CL_SUCCESS;
1140 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ, n *
sizeof( value_type),
sizeof( value_type), NULL, NULL, &l_Error ) );
1141 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1145 ::cl::Event unmapEvent;
1146 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1147 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1148 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1217 return iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1226 return const_iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1236 return const_iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1291 return ( *(
end() - 1) );
1299 return ( *(
end() - 1) );
1302 pointer data(
void )
1309 cl_int l_Error = CL_SUCCESS;
1311 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1312 0,
capacity() *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1314 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1316 pointer sp( ptrBuff, UnMapBufferFunctor< device_vector< value_type > >( *
this ) );
1321 const_pointer data(
void )
const
1323 cl_int l_Error = CL_SUCCESS;
1325 const_naked_pointer ptrBuff =
reinterpret_cast< const_naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ,
1326 0,
capacity() *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1327 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1329 const_pointer sp( ptrBuff, UnMapBufferFunctor<
const device_vector< value_type > >( *
this ) );
1354 return m_Size ?
false:
true;
1363 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
"device_vector size can not be greater than capacity( )" );
1372 cl_int l_Error = CL_SUCCESS;
1374 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_WRITE_INVALIDATE_REGION,
1375 m_Size *
sizeof( value_type),
sizeof( value_type ), NULL, NULL, &l_Error ) );
1376 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for push_back" );
1379 ::cl::Event unmapEvent;
1380 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, result, NULL, &unmapEvent );
1381 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1382 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1405 ::cl::Buffer swapBuffer( m_devMemory );
1406 m_devMemory = vec.m_devMemory;
1407 vec.m_devMemory = swapBuffer;
1409 ::cl::CommandQueue swapQueue( m_commQueue );
1410 m_commQueue = vec.m_commQueue;
1411 vec.m_commQueue = swapQueue;
1413 size_type sizeTmp = m_Size;
1414 m_Size = vec.m_Size;
1415 vec.m_Size = sizeTmp;
1417 cl_mem_flags flagsTmp = m_Flags;
1418 m_Flags = vec.m_Flags;
1419 vec.m_Flags = flagsTmp;
1428 if( &index.m_Container !=
this )
1429 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1432 if( index.m_Index >= l_End.m_Index )
1433 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1435 size_type sizeRegion = l_End.m_Index - index.m_Index;
1437 cl_int l_Error = CL_SUCCESS;
1438 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1439 index.m_Index *
sizeof( value_type ), sizeRegion *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1440 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1442 ::memmove( ptrBuff, ptrBuff + 1, (sizeRegion - 1)*
sizeof( value_type ) );
1444 ::cl::Event unmapEvent;
1445 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1446 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1447 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1451 size_t newIndex = (m_Size < index.m_Index) ? m_Size : index.m_Index;
1452 return iterator( *
this, static_cast< difference_type >( (
int)newIndex ) );
1462 if(( &first.m_Container !=
this ) && ( &last.m_Container !=
this ) )
1463 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1465 if( last.m_Index > m_Size )
1466 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1468 if( (first ==
begin( )) && (last ==
end( )) )
1471 return iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1475 size_type sizeMap = l_End.m_Index - first.m_Index;
1477 cl_int l_Error = CL_SUCCESS;
1478 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1479 first.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1480 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1482 size_type sizeErase = last.m_Index - first.m_Index;
1483 ::memmove( ptrBuff, ptrBuff + sizeErase, (sizeMap - sizeErase)*
sizeof( value_type ) );
1485 ::cl::Event unmapEvent;
1486 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1487 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1488 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1490 m_Size -= sizeErase;
1492 size_type newIndex = (m_Size < last.m_Index) ? m_Size : last.m_Index;
1493 return iterator( *
this, static_cast< typename iterator::difference_type >( newIndex ) );
1505 if( &index.m_Container !=
this )
1506 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1508 if (index.m_Index > m_Size)
1509 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1511 if( index.m_Index == m_Size )
1514 return iterator( *
this, index.m_Index );
1525 size_type sizeMap = (m_Size - index.m_Index) + 1;
1527 cl_int l_Error = CL_SUCCESS;
1528 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1529 index.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1530 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1533 ::memmove( ptrBuff + 1, ptrBuff, (sizeMap - 1)*
sizeof( value_type ) );
1538 ::cl::Event unmapEvent;
1539 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1540 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1541 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1545 return iterator( *
this, index.m_Index );
1557 if( &index.m_Container !=
this )
1558 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1560 if( index.m_Index > m_Size )
1561 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1571 size_type sizeMap = (m_Size - index.m_Index) + n;
1573 cl_int l_Error = CL_SUCCESS;
1574 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1575 index.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1576 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1579 ::memmove( ptrBuff + n, ptrBuff, (sizeMap - n)*
sizeof( value_type ) );
1582 for( size_type i = 0; i < n; ++i )
1584 ptrBuff[ i ] = value;
1587 ::cl::Event unmapEvent;
1588 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1589 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1590 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1595 template<
typename InputIterator >
1598 if( &index.m_Container !=
this )
1599 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1601 if ( index.m_Index > m_Size)
1602 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1607 size_type n =
static_cast< size_type
>( std::distance( begin, end ) );
1612 size_type sizeMap = (m_Size - index.m_Index) + n;
1614 cl_int l_Error = CL_SUCCESS;
1615 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1616 index.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1617 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for iterator insert" );
1620 ::memmove( ptrBuff + n, ptrBuff, (sizeMap - n)*
sizeof( value_type ) );
1623 std::copy( begin, end, stdext::checked_array_iterator< naked_pointer >( ptrBuff, n ) );
1628 ::cl::Event unmapEvent;
1629 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1630 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1631 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1643 void assign( size_type newSize,
const value_type& value )
1645 if( newSize > m_Size )
1651 cl_int l_Error = CL_SUCCESS;
1653 ::cl::Event fillEvent;
1654 size_t sizeDS =
sizeof(value_type);
1656 if( !( sizeDS & (sizeDS - 1 ) ) )
1658 l_Error = m_commQueue.enqueueFillBuffer< value_type >( m_devMemory,
1661 m_Size *
sizeof( value_type ),
1664 V_OPENCL( l_Error,
"device_vector failed to fill the new data with the provided pattern" );
1669 ::cl::Event fill_mapEvent;
1670 value_type *host_buffer = ( value_type* )m_commQueue.enqueueMapBuffer ( m_devMemory,
1672 CL_MAP_READ | CL_MAP_WRITE,
1674 sizeof( value_type )*newSize,
1679 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
1680 fill_mapEvent.wait( );
1686 std::fill_n( stdext::checked_array_iterator< naked_pointer >( host_buffer,newSize),
1698 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory,
1702 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
1706 l_Error = fillEvent.wait( );
1707 V_OPENCL( l_Error,
"device_vector failed to wait for fill event" );
1715 template<
typename InputIterator>
1716 typename std::enable_if< !std::is_integral<InputIterator>::value,
void>::type
1717 assign( InputIterator begin, InputIterator end )
1719 size_type l_Count =
static_cast< size_type
>( std::distance( begin, end ) );
1721 if( l_Count > m_Size )
1727 cl_int l_Error = CL_SUCCESS;
1729 naked_pointer ptrBuffer =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0 , m_Size *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1730 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for push_back" );
1733 std::copy( begin, end, stdext::checked_array_iterator< naked_pointer >( ptrBuffer, m_Size ) );
1737 ::cl::Event unmapEvent;
1738 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuffer, NULL, &unmapEvent );
1739 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1740 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1767 ::cl::Buffer m_devMemory;
1768 ::cl::CommandQueue m_commQueue;
1770 cl_mem_flags m_Flags;
1774 static std::string deviceVectorIteratorTemplate =
1775 std::string (
"#if !defined(BOLT_CL_DEVICE_ITERATOR) \n#define BOLT_CL_DEVICE_ITERATOR \n") +
1778 namespace bolt {
namespace cl { \n
1779 template<
typename T > \n
1780 class device_vector \n
1786 typedef int iterator_category;
1787 typedef T value_type; \n
1788 typedef T base_type; \n
1789 typedef int difference_type; \n
1790 typedef int size_type; \n
1791 typedef T* pointer; \n
1792 typedef T& reference; \n
1794 iterator( value_type init ): m_StartIndex( init ), m_Ptr( 0 ) \n
1797 void init( global value_type* ptr )\n
1802 global value_type& operator[]( size_type threadID ) const \n
1804 return m_Ptr[ m_StartIndex + threadID ]; \n
1807 value_type operator*( ) const \n
1809 return m_Ptr[ m_StartIndex + threadID ]; \n
1812 size_type m_StartIndex; \n
1813 global value_type* m_Ptr; \n
1818 std::string (
"#endif \n");