diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 454e13a00decd..afdcfa1d6d60e 100644 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -336,11 +336,11 @@ class accessor : using reference = DataT &; using const_reference = const DataT &; - template + template accessor( enable_if_t> &BufferRef) + buffer> &BufferRef) #ifdef __SYCL_DEVICE_ONLY__ : impl(id(), BufferRef.get_range(), BufferRef.MemRange) { #else @@ -357,9 +357,9 @@ class accessor : #endif } - template + template accessor( - buffer &BufferRef, + buffer &BufferRef, enable_if_t &CommandGroupHandler) #ifdef __SYCL_DEVICE_ONLY__ @@ -376,11 +376,11 @@ class accessor : } #endif - template 0) && ((!IsPlaceH && IsHostBuf) || (IsPlaceH && (IsGlobalBuf || IsConstantBuf)))>> - accessor(buffer &BufferRef) + accessor(buffer &BufferRef) #ifdef __SYCL_DEVICE_ONLY__ : impl(id(), BufferRef.get_range(), BufferRef.MemRange) { } @@ -398,10 +398,11 @@ class accessor : } #endif - template 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>> - accessor(buffer &BufferRef, handler &CommandGroupHandler) + accessor(buffer &BufferRef, + handler &CommandGroupHandler) #ifdef __SYCL_DEVICE_ONLY__ : impl(id(), BufferRef.get_range(), BufferRef.MemRange) { } @@ -416,12 +417,12 @@ class accessor : } #endif - template 0) && ((!IsPlaceH && IsHostBuf) || (IsPlaceH && (IsGlobalBuf || IsConstantBuf)))>> - accessor(buffer &BufferRef, range AccessRange, - id AccessOffset = {}) + accessor(buffer &BufferRef, + range AccessRange, id AccessOffset = {}) #ifdef __SYCL_DEVICE_ONLY__ : impl(AccessOffset, AccessRange, BufferRef.MemRange) { } @@ -438,11 +439,12 @@ class accessor : } #endif - template 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>> - accessor(buffer &BufferRef, handler &CommandGroupHandler, - range AccessRange, id AccessOffset = {}) + accessor(buffer &BufferRef, + handler &CommandGroupHandler, range AccessRange, + id AccessOffset = {}) #ifdef __SYCL_DEVICE_ONLY__ : impl(AccessOffset, AccessRange, BufferRef.MemRange) { } diff --git a/sycl/include/CL/sycl/buffer.hpp b/sycl/include/CL/sycl/buffer.hpp index d9f75d520d238..f150a60a341f2 100644 --- a/sycl/include/CL/sycl/buffer.hpp +++ b/sycl/include/CL/sycl/buffer.hpp @@ -21,7 +21,7 @@ class queue; template class range; template + typename AllocatorT = cl::sycl::detail::aligned_allocator> class buffer { public: using value_type = T; diff --git a/sycl/include/CL/sycl/detail/aligned_allocator.hpp b/sycl/include/CL/sycl/detail/aligned_allocator.hpp index d99dd9dba166f..a7925aac63338 100644 --- a/sycl/include/CL/sycl/detail/aligned_allocator.hpp +++ b/sycl/include/CL/sycl/detail/aligned_allocator.hpp @@ -10,9 +10,11 @@ #include #include +#include #include #include +#include #include #include #include @@ -20,8 +22,8 @@ namespace cl { namespace sycl { -template -class aligned_allocator { +namespace detail { +template class aligned_allocator { public: using value_type = T; using pointer = T*; @@ -30,10 +32,7 @@ class aligned_allocator { using const_reference = const T&; public: - template - struct rebind { - typedef aligned_allocator other; - }; + template struct rebind { typedef aligned_allocator other; }; // Construct an object void construct(pointer Ptr, const_reference Val) { @@ -46,11 +45,15 @@ class aligned_allocator { pointer address(reference Val) const { return &Val; } const_pointer address(const_reference Val) { return &Val; } - // Allocate aligned (to Alignment) memory + // Allocate sufficiently aligned memory pointer allocate(size_t Size) { - Size += Alignment - Size % Alignment; + size_t NumBytes = Size * sizeof(value_type); + const size_t Alignment = + std::max(getNextPowerOfTwo(sizeof(value_type)), 64); + NumBytes = ((NumBytes - 1) | (Alignment - 1)) + 1; + pointer Result = reinterpret_cast( - detail::OSUtil::alignedAlloc(Alignment, Size * sizeof(value_type))); + detail::OSUtil::alignedAlloc(Alignment, NumBytes)); if (!Result) throw std::bad_alloc(); return Result; @@ -65,5 +68,6 @@ class aligned_allocator { bool operator==(const aligned_allocator&) { return true; } bool operator!=(const aligned_allocator& rhs) { return false; } }; +} // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/include/CL/sycl/detail/buffer_impl.hpp b/sycl/include/CL/sycl/detail/buffer_impl.hpp index bd7ad79bd012f..c0765fa00d48a 100644 --- a/sycl/include/CL/sycl/detail/buffer_impl.hpp +++ b/sycl/include/CL/sycl/detail/buffer_impl.hpp @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -35,7 +36,7 @@ class accessor; template class buffer; class handler; -using buffer_allocator = aligned_allocator; +using buffer_allocator = detail::aligned_allocator; namespace detail { using EventImplPtr = std::shared_ptr; @@ -59,7 +60,10 @@ template class buffer_impl : public SYCLMemObjT { return; set_final_data(reinterpret_cast(HostData)); - if (MProps.has_property()) { + size_t RequiredAlignment = + getNextPowerOfTwo(sizeof(typename AllocatorT::value_type)); + if (reinterpret_cast(HostData) % RequiredAlignment == 0 || + MProps.has_property()) { MUserPtr = HostData; return; } diff --git a/sycl/include/CL/sycl/detail/common.hpp b/sycl/include/CL/sycl/detail/common.hpp index 4031e138f8d47..42a522d59c9eb 100644 --- a/sycl/include/CL/sycl/detail/common.hpp +++ b/sycl/include/CL/sycl/detail/common.hpp @@ -103,6 +103,9 @@ template T createSyclObjFromImpl(decltype(T::impl) ImplObj) { return T(ImplObj); } +// Returns the smallest power of two not less than Var +size_t getNextPowerOfTwo(size_t Var); + } // namespace detail } // namespace sycl } // namespace cl diff --git a/sycl/include/CL/sycl/detail/image_impl.hpp b/sycl/include/CL/sycl/detail/image_impl.hpp index 51a11639081f2..65617cd4e8e28 100644 --- a/sycl/include/CL/sycl/detail/image_impl.hpp +++ b/sycl/include/CL/sycl/detail/image_impl.hpp @@ -26,7 +26,7 @@ enum class image_channel_type : unsigned int; namespace detail { // utility functions and typedefs for image_impl -using image_allocator = aligned_allocator; +using image_allocator = aligned_allocator; // utility function: Returns the Number of Channels for a given Order. uint8_t getImageNumberChannels(image_channel_order Order); diff --git a/sycl/source/detail/common.cpp b/sycl/source/detail/common.cpp index d9841251aa195..25379b367945a 100644 --- a/sycl/source/detail/common.cpp +++ b/sycl/source/detail/common.cpp @@ -237,6 +237,17 @@ vector_class split_string(const string_class &str, return result; } +size_t getNextPowerOfTwo(size_t Var) { + --Var; + Var |= Var >> 1; + Var |= Var >> 2; + Var |= Var >> 4; + Var |= Var >> 8; + Var |= Var >> 16; + Var |= Var >> 32; + return ++Var; +} + } // namespace detail } // namespace sycl } // namespace cl