Skip to content

[SYCL] Reuse user ptr in buffer & fix default memory allocation #243

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions sycl/include/CL/sycl/accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,11 @@ class accessor :
using reference = DataT &;
using const_reference = const DataT &;

template <int Dims = Dimensions>
template <typename AllocatorT, int Dims = Dimensions>
accessor(
enable_if_t<Dims == 0 && ((!IsPlaceH && IsHostBuf) ||
(IsPlaceH && (IsGlobalBuf || IsConstantBuf))),
buffer<DataT, 1>> &BufferRef)
buffer<DataT, 1, AllocatorT>> &BufferRef)
#ifdef __SYCL_DEVICE_ONLY__
: impl(id<AdjustedDim>(), BufferRef.get_range(), BufferRef.MemRange) {
#else
Expand All @@ -357,9 +357,9 @@ class accessor :
#endif
}

template <int Dims = Dimensions>
template <typename AllocatorT, int Dims = Dimensions>
accessor(
buffer<DataT, 1> &BufferRef,
buffer<DataT, 1, AllocatorT> &BufferRef,
enable_if_t<Dims == 0 && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf)),
handler> &CommandGroupHandler)
#ifdef __SYCL_DEVICE_ONLY__
Expand All @@ -376,11 +376,11 @@ class accessor :
}
#endif

template <int Dims = Dimensions,
template <typename AllocatorT, int Dims = Dimensions,
typename = enable_if_t<
(Dims > 0) && ((!IsPlaceH && IsHostBuf) ||
(IsPlaceH && (IsGlobalBuf || IsConstantBuf)))>>
accessor(buffer<DataT, Dimensions> &BufferRef)
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef)
#ifdef __SYCL_DEVICE_ONLY__
: impl(id<Dimensions>(), BufferRef.get_range(), BufferRef.MemRange) {
}
Expand All @@ -398,10 +398,11 @@ class accessor :
}
#endif

template <int Dims = Dimensions,
template <typename AllocatorT, int Dims = Dimensions,
typename = enable_if_t<
(Dims > 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>>
accessor(buffer<DataT, Dimensions> &BufferRef, handler &CommandGroupHandler)
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
handler &CommandGroupHandler)
#ifdef __SYCL_DEVICE_ONLY__
: impl(id<AdjustedDim>(), BufferRef.get_range(), BufferRef.MemRange) {
}
Expand All @@ -416,12 +417,12 @@ class accessor :
}
#endif

template <int Dims = Dimensions,
template <typename AllocatorT, int Dims = Dimensions,
typename = enable_if_t<
(Dims > 0) && ((!IsPlaceH && IsHostBuf) ||
(IsPlaceH && (IsGlobalBuf || IsConstantBuf)))>>
accessor(buffer<DataT, Dimensions> &BufferRef, range<Dimensions> AccessRange,
id<Dimensions> AccessOffset = {})
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
range<Dimensions> AccessRange, id<Dimensions> AccessOffset = {})
#ifdef __SYCL_DEVICE_ONLY__
: impl(AccessOffset, AccessRange, BufferRef.MemRange) {
}
Expand All @@ -438,11 +439,12 @@ class accessor :
}
#endif

template <int Dims = Dimensions,
template <typename AllocatorT, int Dims = Dimensions,
typename = enable_if_t<
(Dims > 0) && (!IsPlaceH && (IsGlobalBuf || IsConstantBuf))>>
accessor(buffer<DataT, Dimensions> &BufferRef, handler &CommandGroupHandler,
range<Dimensions> AccessRange, id<Dimensions> AccessOffset = {})
accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
handler &CommandGroupHandler, range<Dimensions> AccessRange,
id<Dimensions> AccessOffset = {})
#ifdef __SYCL_DEVICE_ONLY__
: impl(AccessOffset, AccessRange, BufferRef.MemRange) {
}
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class queue;
template <int dimensions> class range;

template <typename T, int dimensions = 1,
typename AllocatorT = cl::sycl::buffer_allocator>
typename AllocatorT = cl::sycl::detail::aligned_allocator<T>>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

align with specification

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

align with specification

Spec is incorrect at this point. buffer_allocator must be a template class for various of reasons, but it's not. The issue will be created, but for now I would stay with the approach made by Sergey.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, what I mean, I would approve the change unless Alexey hasn't strong objections of doing so.

Copy link
Contributor

@bader bader Jun 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No objections to approve the patch from me.

class buffer {
public:
using value_type = T;
Expand Down
22 changes: 13 additions & 9 deletions sycl/include/CL/sycl/detail/aligned_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,20 @@

#include <CL/cl.h>
#include <CL/sycl/detail/cnri.h>
#include <CL/sycl/detail/common.hpp>
#include <CL/sycl/detail/os_util.hpp>
#include <CL/sycl/range.hpp>

#include <algorithm>
#include <cstring>
#include <cstdlib>
#include <memory>
#include <vector>

namespace cl {
namespace sycl {
template <typename T, size_t Alignment>
class aligned_allocator {
namespace detail {
template <typename T> class aligned_allocator {
public:
using value_type = T;
using pointer = T*;
Expand All @@ -30,10 +32,7 @@ class aligned_allocator {
using const_reference = const T&;

public:
template<typename U>
struct rebind {
typedef aligned_allocator<U, Alignment> other;
};
template <typename U> struct rebind { typedef aligned_allocator<U> other; };

// Construct an object
void construct(pointer Ptr, const_reference Val) {
Expand All @@ -46,11 +45,15 @@ class aligned_allocator {
pointer address(reference Val) const { return &Val; }
const_pointer address(const_reference Val) { return &Val; }

// Allocate aligned (to Alignment) memory
// Allocate sufficiently aligned memory
pointer allocate(size_t Size) {
Size += Alignment - Size % Alignment;
size_t NumBytes = Size * sizeof(value_type);
const size_t Alignment =
std::max<size_t>(getNextPowerOfTwo(sizeof(value_type)), 64);
NumBytes = ((NumBytes - 1) | (Alignment - 1)) + 1;

pointer Result = reinterpret_cast<pointer>(
detail::OSUtil::alignedAlloc(Alignment, Size * sizeof(value_type)));
detail::OSUtil::alignedAlloc(Alignment, NumBytes));
if (!Result)
throw std::bad_alloc();
return Result;
Expand All @@ -65,5 +68,6 @@ class aligned_allocator {
bool operator==(const aligned_allocator&) { return true; }
bool operator!=(const aligned_allocator& rhs) { return false; }
};
} // namespace detail
} // namespace sycl
} // namespace cl
8 changes: 6 additions & 2 deletions sycl/include/CL/sycl/detail/buffer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <CL/sycl/stl.hpp>
#include <CL/sycl/types.hpp>

#include <cstdint>
#include <functional>
#include <memory>
#include <type_traits>
Expand All @@ -35,7 +36,7 @@ class accessor;
template <typename T, int Dimensions, typename AllocatorT> class buffer;
class handler;

using buffer_allocator = aligned_allocator<char, /*Alignment*/64>;
using buffer_allocator = detail::aligned_allocator<char>;

namespace detail {
using EventImplPtr = std::shared_ptr<detail::event_impl>;
Expand All @@ -59,7 +60,10 @@ template <typename AllocatorT> class buffer_impl : public SYCLMemObjT {
return;

set_final_data(reinterpret_cast<char *>(HostData));
if (MProps.has_property<property::buffer::use_host_ptr>()) {
size_t RequiredAlignment =
getNextPowerOfTwo(sizeof(typename AllocatorT::value_type));
if (reinterpret_cast<std::uintptr_t>(HostData) % RequiredAlignment == 0 ||
MProps.has_property<property::buffer::use_host_ptr>()) {
MUserPtr = HostData;
return;
}
Expand Down
3 changes: 3 additions & 0 deletions sycl/include/CL/sycl/detail/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ template <class T> T createSyclObjFromImpl(decltype(T::impl) ImplObj) {
return T(ImplObj);
}

// Returns the smallest power of two not less than Var
size_t getNextPowerOfTwo(size_t Var);

} // namespace detail
} // namespace sycl
} // namespace cl
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/detail/image_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ enum class image_channel_type : unsigned int;
namespace detail {

// utility functions and typedefs for image_impl
using image_allocator = aligned_allocator<byte, /*alignment*/ 64>;
using image_allocator = aligned_allocator<byte>;

// utility function: Returns the Number of Channels for a given Order.
uint8_t getImageNumberChannels(image_channel_order Order);
Expand Down
11 changes: 11 additions & 0 deletions sycl/source/detail/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,17 @@ vector_class<string_class> split_string(const string_class &str,
return result;
}

size_t getNextPowerOfTwo(size_t Var) {
--Var;
Var |= Var >> 1;
Var |= Var >> 2;
Var |= Var >> 4;
Var |= Var >> 8;
Var |= Var >> 16;
Var |= Var >> 32;
return ++Var;
}

} // namespace detail
} // namespace sycl
} // namespace cl