diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index f24583c5889e9..dfd68505274f2 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -292,7 +292,8 @@ typedef enum { PI_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE = 0x10025, PI_DEVICE_INFO_MAX_MEM_BANDWIDTH = 0x10026, PI_DEVICE_INFO_IMAGE_SRGB = 0x10027, - PI_DEVICE_INFO_ATOMIC_64 = 0x10110 + PI_DEVICE_INFO_ATOMIC_64 = 0x10110, + PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111 } _pi_device_info; typedef enum { @@ -312,6 +313,8 @@ typedef enum { PI_CONTEXT_INFO_NUM_DEVICES = CL_CONTEXT_NUM_DEVICES, PI_CONTEXT_INFO_PROPERTIES = CL_CONTEXT_PROPERTIES, PI_CONTEXT_INFO_REFERENCE_COUNT = CL_CONTEXT_REFERENCE_COUNT, + // Atomics capabilities extensions + PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10010 } _pi_context_info; typedef enum { @@ -509,6 +512,13 @@ constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_ADDRESSING_MODE = constexpr pi_sampler_properties PI_SAMPLER_PROPERTIES_FILTER_MODE = CL_SAMPLER_FILTER_MODE; +using pi_memory_order_capabilities = pi_bitfield; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELAXED = 0x01; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQUIRE = 0x02; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_RELEASE = 0x04; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_ACQ_REL = 0x08; +constexpr pi_memory_order_capabilities PI_MEMORY_ORDER_SEQ_CST = 0x10; + typedef enum { PI_PROFILING_INFO_COMMAND_QUEUED = CL_PROFILING_COMMAND_QUEUED, PI_PROFILING_INFO_COMMAND_SUBMIT = CL_PROFILING_COMMAND_SUBMIT, diff --git a/sycl/include/CL/sycl/detail/spirv.hpp b/sycl/include/CL/sycl/detail/spirv.hpp index 6294bf89d928f..a326a72d480dd 100644 --- a/sycl/include/CL/sycl/detail/spirv.hpp +++ b/sycl/include/CL/sycl/detail/spirv.hpp @@ -224,11 +224,10 @@ EnableIfGenericBroadcast GroupBroadcast(T x, id local_id) { // Single happens-before means semantics should always apply to all spaces // Although consume is unsupported, forwarding to acquire is valid template -static inline constexpr typename std::enable_if< - std::is_same::value || - std::is_same::value, - __spv::MemorySemanticsMask::Flag>::type -getMemorySemanticsMask(T Order) { +static inline constexpr + typename std::enable_if::value, + __spv::MemorySemanticsMask::Flag>::type + getMemorySemanticsMask(T Order) { __spv::MemorySemanticsMask::Flag SpvOrder = __spv::MemorySemanticsMask::None; switch (Order) { case T::relaxed: @@ -254,28 +253,25 @@ getMemorySemanticsMask(T Order) { __spv::MemorySemanticsMask::CrossWorkgroupMemory); } -static inline constexpr __spv::Scope::Flag -getScope(ext::oneapi::memory_scope Scope) { +static inline constexpr __spv::Scope::Flag getScope(memory_scope Scope) { switch (Scope) { - case ext::oneapi::memory_scope::work_item: + case memory_scope::work_item: return __spv::Scope::Invocation; - case ext::oneapi::memory_scope::sub_group: + case memory_scope::sub_group: return __spv::Scope::Subgroup; - case ext::oneapi::memory_scope::work_group: + case memory_scope::work_group: return __spv::Scope::Workgroup; - case ext::oneapi::memory_scope::device: + case memory_scope::device: return __spv::Scope::Device; - case ext::oneapi::memory_scope::system: + case memory_scope::system: return __spv::Scope::CrossDevice; } } template inline typename detail::enable_if_t::value, T> -AtomicCompareExchange(multi_ptr MPtr, - ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Success, - ext::oneapi::memory_order Failure, T Desired, +AtomicCompareExchange(multi_ptr MPtr, memory_scope Scope, + memory_order Success, memory_order Failure, T Desired, T Expected) { auto SPIRVSuccess = getMemorySemanticsMask(Success); auto SPIRVFailure = getMemorySemanticsMask(Failure); @@ -287,10 +283,8 @@ AtomicCompareExchange(multi_ptr MPtr, template inline typename detail::enable_if_t::value, T> -AtomicCompareExchange(multi_ptr MPtr, - ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Success, - ext::oneapi::memory_order Failure, T Desired, +AtomicCompareExchange(multi_ptr MPtr, memory_scope Scope, + memory_order Success, memory_order Failure, T Desired, T Expected) { using I = detail::make_unsinged_integer_t; auto SPIRVSuccess = getMemorySemanticsMask(Success); @@ -308,8 +302,8 @@ AtomicCompareExchange(multi_ptr MPtr, template inline typename detail::enable_if_t::value, T> -AtomicLoad(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order) { +AtomicLoad(multi_ptr MPtr, memory_scope Scope, + memory_order Order) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -318,8 +312,8 @@ AtomicLoad(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicLoad(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order) { +AtomicLoad(multi_ptr MPtr, memory_scope Scope, + memory_order Order) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -332,8 +326,8 @@ AtomicLoad(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value> -AtomicStore(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicStore(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -342,8 +336,8 @@ AtomicStore(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value> -AtomicStore(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicStore(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -356,8 +350,8 @@ AtomicStore(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicExchange(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicExchange(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -366,8 +360,8 @@ AtomicExchange(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicExchange(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicExchange(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { using I = detail::make_unsinged_integer_t; auto *PtrInt = reinterpret_cast::pointer_t>( @@ -382,8 +376,8 @@ AtomicExchange(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicIAdd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicIAdd(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -392,8 +386,8 @@ AtomicIAdd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicISub(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicISub(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -402,8 +396,8 @@ AtomicISub(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicFAdd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicFAdd(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -412,8 +406,8 @@ AtomicFAdd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicAnd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicAnd(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -422,8 +416,8 @@ AtomicAnd(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicOr(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicOr(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -432,8 +426,8 @@ AtomicOr(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicXor(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicXor(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -442,8 +436,8 @@ AtomicXor(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMin(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicMin(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -452,8 +446,8 @@ AtomicMin(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMin(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicMin(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -462,8 +456,8 @@ AtomicMin(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMax(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicMax(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); @@ -472,8 +466,8 @@ AtomicMax(multi_ptr MPtr, ext::oneapi::memory_scope Scope, template inline typename detail::enable_if_t::value, T> -AtomicMax(multi_ptr MPtr, ext::oneapi::memory_scope Scope, - ext::oneapi::memory_order Order, T Value) { +AtomicMax(multi_ptr MPtr, memory_scope Scope, + memory_order Order, T Value) { auto *Ptr = MPtr.get(); auto SPIRVOrder = getMemorySemanticsMask(Order); auto SPIRVScope = getScope(Scope); diff --git a/sycl/include/CL/sycl/info/context_traits.def b/sycl/include/CL/sycl/info/context_traits.def index 136bc3dd9f58e..61daa39233b58 100644 --- a/sycl/include/CL/sycl/info/context_traits.def +++ b/sycl/include/CL/sycl/info/context_traits.def @@ -1,3 +1,4 @@ __SYCL_PARAM_TRAITS_SPEC(context, reference_count, cl_uint) __SYCL_PARAM_TRAITS_SPEC(context, platform, cl::sycl::platform) __SYCL_PARAM_TRAITS_SPEC(context, devices, std::vector) +__SYCL_PARAM_TRAITS_SPEC(context, atomic_memory_order_capabilities, std::vector) diff --git a/sycl/include/CL/sycl/info/device_traits.def b/sycl/include/CL/sycl/info/device_traits.def index e14e62ea93b9b..208d06f8b31ca 100644 --- a/sycl/include/CL/sycl/info/device_traits.def +++ b/sycl/include/CL/sycl/info/device_traits.def @@ -23,6 +23,8 @@ __SYCL_PARAM_TRAITS_SPEC(device, address_bits, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, max_mem_alloc_size, pi_uint64) __SYCL_PARAM_TRAITS_SPEC(device, image_support, bool) __SYCL_PARAM_TRAITS_SPEC(device, atomic64, bool) +__SYCL_PARAM_TRAITS_SPEC(device, atomic_memory_order_capabilities, + std::vector) __SYCL_PARAM_TRAITS_SPEC(device, max_read_image_args, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, max_write_image_args, pi_uint32) __SYCL_PARAM_TRAITS_SPEC(device, image2d_max_width, size_t) diff --git a/sycl/include/CL/sycl/info/info_desc.hpp b/sycl/include/CL/sycl/info/info_desc.hpp index ce7c883f0ab41..2e7ad37c7547d 100644 --- a/sycl/include/CL/sycl/info/info_desc.hpp +++ b/sycl/include/CL/sycl/info/info_desc.hpp @@ -37,6 +37,8 @@ enum class context : cl_context_info { reference_count = CL_CONTEXT_REFERENCE_COUNT, platform = CL_CONTEXT_PLATFORM, devices = CL_CONTEXT_DEVICES, + atomic_memory_order_capabilities = + PI_CONTEXT_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, }; // A.3 Device information descriptors @@ -149,7 +151,9 @@ enum class device : cl_device_info { ext_intel_mem_channel = PI_MEM_PROPERTIES_CHANNEL, ext_oneapi_srgb = PI_DEVICE_INFO_IMAGE_SRGB, ext_intel_device_info_uuid = PI_DEVICE_INFO_UUID, - atomic64 = PI_DEVICE_INFO_ATOMIC_64 + atomic64 = PI_DEVICE_INFO_ATOMIC_64, + atomic_memory_order_capabilities = + PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES, }; enum class device_type : pi_uint64 { diff --git a/sycl/include/CL/sycl/memory_enums.hpp b/sycl/include/CL/sycl/memory_enums.hpp index 22faec62bc67c..e552268b4c486 100644 --- a/sycl/include/CL/sycl/memory_enums.hpp +++ b/sycl/include/CL/sycl/memory_enums.hpp @@ -8,11 +8,8 @@ #pragma once -#include - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { -using ext::oneapi::memory_scope; enum class memory_order : int { relaxed = 0, @@ -24,6 +21,14 @@ enum class memory_order : int { seq_cst = 5 }; +enum class memory_scope : int { + work_item = 0, + sub_group = 1, + work_group = 2, + device = 3, + system = 4 +}; + #if __cplusplus >= 201703L inline constexpr auto memory_scope_work_item = memory_scope::work_item; inline constexpr auto memory_scope_sub_group = memory_scope::sub_group; @@ -38,9 +43,25 @@ inline constexpr auto memory_order_acq_rel = memory_order::acq_rel; inline constexpr auto memory_order_seq_cst = memory_order::seq_cst; #endif -#ifndef __SYCL_DEVICE_ONLY__ namespace detail { +inline std::vector +readMemoryOrderBitfield(pi_memory_order_capabilities bits) { + std::vector result; + if (bits & PI_MEMORY_ORDER_RELAXED) + result.push_back(memory_order::relaxed); + if (bits & PI_MEMORY_ORDER_ACQUIRE) + result.push_back(memory_order::acquire); + if (bits & PI_MEMORY_ORDER_RELEASE) + result.push_back(memory_order::release); + if (bits & PI_MEMORY_ORDER_ACQ_REL) + result.push_back(memory_order::acq_rel); + if (bits & PI_MEMORY_ORDER_SEQ_CST) + result.push_back(memory_order::seq_cst); + return result; +} + +#ifndef __SYCL_DEVICE_ONLY__ static constexpr std::memory_order getStdMemoryOrder(sycl::memory_order order) { switch (order) { case memory_order::relaxed: @@ -57,8 +78,8 @@ static constexpr std::memory_order getStdMemoryOrder(sycl::memory_order order) { return std::memory_order_seq_cst; } } +#endif // __SYCL_DEVICE_ONLY__ } // namespace detail -#endif // __SYCL_DEVICE_ONLY__ } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/sycl/ext/oneapi/atomic_enums.hpp b/sycl/include/sycl/ext/oneapi/atomic_enums.hpp index ffbeacd415267..487251b0fe128 100644 --- a/sycl/include/sycl/ext/oneapi/atomic_enums.hpp +++ b/sycl/include/sycl/ext/oneapi/atomic_enums.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #ifndef __SYCL_DEVICE_ONLY__ #include @@ -23,15 +24,7 @@ namespace sycl { namespace ext { namespace oneapi { -enum class memory_order : int { - relaxed = 0, - acquire = 1, - __consume_unsupported = - 2, // helps optimizer when mapping to std::memory_order - release = 3, - acq_rel = 4, - seq_cst = 5 -}; +using memory_order = cl::sycl::memory_order; __SYCL_INLINE_CONSTEXPR memory_order memory_order_relaxed = memory_order::relaxed; __SYCL_INLINE_CONSTEXPR memory_order memory_order_acquire = @@ -43,13 +36,7 @@ __SYCL_INLINE_CONSTEXPR memory_order memory_order_acq_rel = __SYCL_INLINE_CONSTEXPR memory_order memory_order_seq_cst = memory_order::seq_cst; -enum class memory_scope : int { - work_item = 0, - sub_group = 1, - work_group = 2, - device = 3, - system = 4 -}; +using memory_scope = cl::sycl::memory_scope; __SYCL_INLINE_CONSTEXPR memory_scope memory_scope_work_item = memory_scope::work_item; __SYCL_INLINE_CONSTEXPR memory_scope memory_scope_sub_group = diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index a217bff521d1a..a94e9708cdcbd 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -1039,7 +1039,13 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, atomic64); } - + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: { + // NVPTX currently only support at most monotonic atomic load/store. + // Acquire and release is present in newer PTX, but is not yet supported + // in LLVM NVPTX. + return getInfo(param_value_size, param_value, param_value_size_ret, + PI_MEMORY_ORDER_RELAXED); + } case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { // NVIDIA devices only support one sub-group size (the warp size) int warpSize = 0; diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 9d6e2bfd0ac6c..38b69d3eb382f 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -182,7 +182,9 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, // For details about Intel UUID extension, see // sycl/doc/extensions/IntelGPU/IntelGPUDeviceInfo.md case PI_DEVICE_INFO_UUID: + // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_64: + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: return PI_INVALID_VALUE; case PI_DEVICE_INFO_IMAGE_SRGB: return PI_SUCCESS; diff --git a/sycl/plugins/rocm/pi_rocm.cpp b/sycl/plugins/rocm/pi_rocm.cpp index 026c1fa8d5f10..86b5e44932010 100644 --- a/sycl/plugins/rocm/pi_rocm.cpp +++ b/sycl/plugins/rocm/pi_rocm.cpp @@ -1533,7 +1533,10 @@ pi_result rocm_piDeviceGetInfo(pi_device device, pi_device_info param_name, return getInfo(param_value_size, param_value, param_value_size_ret, value); } - // TODO: Investigate if this information is available on HIP. + // TODO: Implement. + case PI_DEVICE_INFO_ATOMIC_64: + case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: + // TODO: Investigate if this information is available on HIP. case PI_DEVICE_INFO_PCI_ADDRESS: case PI_DEVICE_INFO_GPU_EU_COUNT: case PI_DEVICE_INFO_GPU_EU_SIMD_WIDTH: diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 88b49a835743e..32f2ed4df6191 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -146,6 +146,23 @@ std::vector context_impl::get_info() const { return MDevices; } +template <> +std::vector +context_impl::get_info() + const { + if (is_host()) + return {cl::sycl::memory_order::relaxed, cl::sycl::memory_order::acquire, + cl::sycl::memory_order::release, cl::sycl::memory_order::acq_rel, + cl::sycl::memory_order::seq_cst}; + + pi_memory_order_capabilities Result; + getPlugin().call( + MContext, + pi::cast( + info::context::atomic_memory_order_capabilities), + sizeof(Result), &Result, nullptr); + return readMemoryOrderBitfield(Result); +} RT::PiContext &context_impl::getHandleRef() { return MContext; } const RT::PiContext &context_impl::getHandleRef() const { return MContext; } diff --git a/sycl/source/detail/context_info.hpp b/sycl/source/detail/context_info.hpp index cead0d2e09c2f..737c6bbb7a508 100644 --- a/sycl/source/detail/context_info.hpp +++ b/sycl/source/detail/context_info.hpp @@ -30,6 +30,24 @@ template struct get_context_info { } }; +// Specialization for atomic_memory_order_capabilities, PI returns a bitfield +template <> +struct get_context_info { + using RetType = typename info::param_traits< + info::context, + info::context::atomic_memory_order_capabilities>::return_type; + + static RetType get(RT::PiContext ctx, const plugin &Plugin) { + pi_memory_order_capabilities Result; + Plugin.call( + ctx, + pi::cast( + info::context::atomic_memory_order_capabilities), + sizeof(Result), &Result, nullptr); + return readMemoryOrderBitfield(Result); + } +}; + } // namespace detail } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index f947a284b9273..718a0919f906a 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -249,6 +250,21 @@ template <> struct get_device_info { } }; +// Specialization for atomic_memory_order_capabilities, PI returns a bitfield +template <> +struct get_device_info, + info::device::atomic_memory_order_capabilities> { + static std::vector get(RT::PiDevice dev, const plugin &Plugin) { + pi_memory_order_capabilities result; + Plugin.call_nocheck( + dev, + pi::cast( + info::device::atomic_memory_order_capabilities), + sizeof(pi_memory_order_capabilities), &result, nullptr); + return readMemoryOrderBitfield(result); + } +}; + // Specialization for exec_capabilities, OpenCL returns a bitfield template <> struct get_device_info, @@ -631,6 +647,13 @@ template <> inline bool get_device_info_host() { return false; } +template <> +inline std::vector +get_device_info_host() { + return {memory_order::relaxed, memory_order::acquire, memory_order::release, + memory_order::acq_rel, memory_order::seq_cst}; +} + template <> inline cl_uint get_device_info_host() { // current value is the required minimum diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 68c3b87ec2af9..fea2ec4b9bf2d 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4169,6 +4169,7 @@ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65573EEENS3_12param_traitsIS4_XT_ _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65574EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65575EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65808EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl6device8get_infoILNS0_4info6deviceE65809EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl6device9getNativeEv _ZNK2cl4sycl6kernel11get_contextEv _ZNK2cl4sycl6kernel11get_programEv @@ -4236,6 +4237,7 @@ _ZNK2cl4sycl7context7is_hostEv _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4224EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4225EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl7context8get_infoILNS0_4info7contextE4228EEENS3_12param_traitsIS4_XT_EE11return_typeEv +_ZNK2cl4sycl7context8get_infoILNS0_4info7contextE65552EEENS3_12param_traitsIS4_XT_EE11return_typeEv _ZNK2cl4sycl7context9getNativeEv _ZNK2cl4sycl7handler30getOrInsertHandlerKernelBundleEb _ZNK2cl4sycl7program10get_kernelENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE diff --git a/sycl/test/atomic_ref/accessor.cpp b/sycl/test/atomic_ref/accessor.cpp index 432fd14c3c70d..e0d765fe5b5e6 100644 --- a/sycl/test/atomic_ref/accessor.cpp +++ b/sycl/test/atomic_ref/accessor.cpp @@ -22,16 +22,15 @@ template void accessor_test(queue q, size_t N) { q.submit([&](handler &cgh) { #if __cplusplus > 201402L static_assert( - std::is_same< - decltype( - atomic_accessor(sum_buf, cgh, relaxed_order, device_scope)), - atomic_accessor>::value, + std::is_same>::value, "atomic_accessor type incorrectly deduced"); #endif auto sum = - atomic_accessor(sum_buf, cgh); + atomic_accessor( + sum_buf, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { @@ -39,8 +38,7 @@ template void accessor_test(queue q, size_t N) { static_assert( std::is_same< decltype(sum[0]), - atomic_ref>::value, "atomic_accessor returns incorrect atomic_ref"); out[gid] = sum[0].fetch_add(T(1)); @@ -69,19 +67,19 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) { { buffer output_buf(output.data(), output.size()); q.submit([&](handler &cgh) { - auto sum = atomic_accessor(1, cgh); + auto sum = + atomic_accessor(1, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) { int grp = it.get_group(0); sum[0].store(0); it.barrier(); static_assert( - std::is_same>::value, + std::is_same< + decltype(sum[0]), + atomic_ref>::value, "local atomic_accessor returns incorrect atomic_ref"); T result = sum[0].fetch_add(T(1)); if (result == it.get_local_range(0) - 1) { diff --git a/sycl/test/atomic_ref/add.cpp b/sycl/test/atomic_ref/add.cpp index f075d8628cbb8..08d1c42588bc9 100644 --- a/sycl/test/atomic_ref/add.cpp +++ b/sycl/test/atomic_ref/add.cpp @@ -29,8 +29,7 @@ void add_fetch_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = atm.fetch_add(Difference(1)); }); @@ -65,8 +64,7 @@ void add_plus_equal_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = atm += Difference(1); }); @@ -101,8 +99,7 @@ void add_pre_inc_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = ++atm; }); @@ -137,8 +134,7 @@ void add_post_inc_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = atm++; }); diff --git a/sycl/test/atomic_ref/compare_exchange.cpp b/sycl/test/atomic_ref/compare_exchange.cpp index f3b0829af7916..977fa38a109f4 100644 --- a/sycl/test/atomic_ref/compare_exchange.cpp +++ b/sycl/test/atomic_ref/compare_exchange.cpp @@ -31,9 +31,9 @@ template void compare_exchange_test(queue q, size_t N) { cgh.parallel_for>( range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = + atomic_ref(exc[0]); T result = T(N); // Avoid copying pointer bool success = atm.compare_exchange_strong(result, (T)gid); if (success) { diff --git a/sycl/test/atomic_ref/exchange.cpp b/sycl/test/atomic_ref/exchange.cpp index 18d4014566caa..fc5d800bca3d6 100644 --- a/sycl/test/atomic_ref/exchange.cpp +++ b/sycl/test/atomic_ref/exchange.cpp @@ -29,8 +29,7 @@ template void exchange_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); out[gid] = atm.exchange(T(gid)); }); diff --git a/sycl/test/atomic_ref/load.cpp b/sycl/test/atomic_ref/load.cpp index b27f7a8d20880..adfd82ffb9357 100644 --- a/sycl/test/atomic_ref/load.cpp +++ b/sycl/test/atomic_ref/load.cpp @@ -28,8 +28,7 @@ template void load_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); diff --git a/sycl/test/atomic_ref/max.cpp b/sycl/test/atomic_ref/max.cpp index 112c063d163b2..8542cddd227bb 100644 --- a/sycl/test/atomic_ref/max.cpp +++ b/sycl/test/atomic_ref/max.cpp @@ -29,8 +29,7 @@ template void max_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); // +1 accounts for lowest() returning 0 for unsigned types diff --git a/sycl/test/atomic_ref/min.cpp b/sycl/test/atomic_ref/min.cpp index f085d730103be..a1e2342ef84af 100644 --- a/sycl/test/atomic_ref/min.cpp +++ b/sycl/test/atomic_ref/min.cpp @@ -29,8 +29,7 @@ template void min_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_min(T(gid)); }); diff --git a/sycl/test/atomic_ref/store.cpp b/sycl/test/atomic_ref/store.cpp index 1223eb33a0d65..a0ad18764da2a 100644 --- a/sycl/test/atomic_ref/store.cpp +++ b/sycl/test/atomic_ref/store.cpp @@ -22,8 +22,7 @@ template void store_test(queue q, size_t N) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); diff --git a/sycl/test/atomic_ref/sub.cpp b/sycl/test/atomic_ref/sub.cpp index a69a31f8fe38f..9947499d70969 100644 --- a/sycl/test/atomic_ref/sub.cpp +++ b/sycl/test/atomic_ref/sub.cpp @@ -29,8 +29,7 @@ void sub_fetch_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_sub(Difference(1)); }); @@ -65,8 +64,7 @@ void sub_plus_equal_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm -= Difference(1); }); @@ -101,8 +99,7 @@ void sub_pre_dec_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = --atm; }); @@ -137,8 +134,7 @@ void sub_post_dec_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm--; });