diff --git a/SYCL/AtomicRef/accessor.cpp b/SYCL/AtomicRef/accessor.cpp index 51a64fd3ea..60380d8e53 100644 --- a/SYCL/AtomicRef/accessor.cpp +++ b/SYCL/AtomicRef/accessor.cpp @@ -26,12 +26,13 @@ template void accessor_test(queue q, size_t N) { static_assert( std::is_same>::value, + atomic_accessor>::value, "atomic_accessor type incorrectly deduced"); #endif - auto sum = atomic_accessor(sum_buf, cgh); + auto sum = + atomic_accessor( + sum_buf, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { @@ -39,8 +40,7 @@ template void accessor_test(queue q, size_t N) { static_assert( std::is_same< decltype(sum[0]), - atomic_ref>::value, "atomic_accessor returns incorrect atomic_ref"); out[gid] = sum[0].fetch_add(T(1)); @@ -70,19 +70,18 @@ void local_accessor_test(queue q, size_t N, size_t L = 8) { buffer output_buf(output.data(), output.size()); q.submit([&](handler &cgh) { auto sum = - atomic_accessor( - 1, cgh); + atomic_accessor(1, cgh); auto out = output_buf.template get_access(cgh); cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) { int grp = it.get_group(0); sum[0].store(0); it.barrier(); static_assert( - std::is_same>::value, + std::is_same< + decltype(sum[0]), + atomic_ref>::value, "local atomic_accessor returns incorrect atomic_ref"); T result = sum[0].fetch_add(T(1)); if (result == it.get_local_range(0) - 1) { diff --git a/SYCL/AtomicRef/add.cpp b/SYCL/AtomicRef/add.cpp index a947a0d450..a0f2649bdf 100644 --- a/SYCL/AtomicRef/add.cpp +++ b/SYCL/AtomicRef/add.cpp @@ -28,8 +28,7 @@ void add_fetch_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = atm.fetch_add(Difference(1)); }); @@ -64,8 +63,7 @@ void add_plus_equal_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = atm += Difference(1); }); @@ -100,8 +98,7 @@ void add_pre_inc_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = ++atm; }); @@ -136,8 +133,7 @@ void add_post_inc_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); out[gid] = atm++; }); diff --git a/SYCL/AtomicRef/atomic_memory_order.cpp b/SYCL/AtomicRef/atomic_memory_order.cpp new file mode 100644 index 0000000000..59ca241d58 --- /dev/null +++ b/SYCL/AtomicRef/atomic_memory_order.cpp @@ -0,0 +1,149 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// L0, OpenCL, and ROCm backends don't currently support +// info::device::atomic_memory_order_capabilities and aspect::atomic64 +// XFAIL: level_zero || opencl || rocm + +// NOTE: Tests load and store for supported memory orderings. + +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::ONEAPI; + +template class memory_order_kernel; + +template void acq_rel_test(queue q, size_t N) { + T a = 0; + { + buffer a_buf(&a, 1); + + q.submit([&](handler &cgh) { + auto a_acc = a_buf.template get_access(cgh); + cgh.parallel_for>( + range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto aar = + atomic_ref(a_acc[0]); + auto ld = aar.load(); + ld += 1; + aar.store(ld); + }); + }); + } + + // All work-items increment by 1, so final value should be equal to N + assert(a == T(N)); +} + +template void seq_cst_test(queue q, size_t N) { + T a = 0; + T b = 0; + { + buffer a_buf(&a, 1); + buffer b_buf(&b, 1); + + q.submit([&](handler &cgh) { + auto a_acc = a_buf.template get_access(cgh); + auto b_acc = b_buf.template get_access(cgh); + cgh.parallel_for>( + range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + auto aar = + atomic_ref(a_acc[0]); + auto bar = + atomic_ref(b_acc[0]); + auto ald = aar.load(); + auto bld = bar.load(); + ald += 1; + bld += ald; + bar.store(bld); + aar.store(ald); + }); + }); + } + + // All work-items increment a by 1, so final value should be equal to N + assert(a == T(N)); + // b is the sum of [1..N] + size_t rsum = 0; + for (size_t i = 1; i <= N; ++i) + rsum += i; + assert(b == T(rsum)); +} + +bool is_supported(std::vector capabilities, + memory_order mem_order) { + return std::find(capabilities.begin(), capabilities.end(), mem_order) != + capabilities.end(); +} + +int main() { + queue q; + + std::vector supported_memory_orders = + q.get_device().get_info(); + bool atomic64_support = q.get_device().has(aspect::atomic64); + + constexpr int N = 32; + + // Relaxed memory order must be supported. This ordering is used in other + // tests. + assert(is_supported(supported_memory_orders, memory_order::relaxed)); + + if (is_supported(supported_memory_orders, memory_order::acq_rel)) { + // Acquire-release memory order must also support both acquire and release + // orderings. + assert(is_supported(supported_memory_orders, memory_order::acquire) && + is_supported(supported_memory_orders, memory_order::release)); + acq_rel_test(q, N); + acq_rel_test(q, N); + acq_rel_test(q, N); + if (sizeof(long) == 4) { + // long is 32-bit + acq_rel_test(q, N); + acq_rel_test(q, N); + } + if (atomic64_support) { + if (sizeof(long) == 8) { + // long is 64-bit + acq_rel_test(q, N); + acq_rel_test(q, N); + } + acq_rel_test(q, N); + acq_rel_test(q, N); + acq_rel_test(q, N); + } + } + + if (is_supported(supported_memory_orders, memory_order::seq_cst)) { + seq_cst_test(q, N); + seq_cst_test(q, N); + seq_cst_test(q, N); + if (sizeof(long) == 4) { + // long is 32-bit + seq_cst_test(q, N); + seq_cst_test(q, N); + } + if (atomic64_support) { + if (sizeof(long) == 8) { + // long is 64-bit + seq_cst_test(q, N); + seq_cst_test(q, N); + } + seq_cst_test(q, N); + seq_cst_test(q, N); + seq_cst_test(q, N); + } + } + + std::cout << "Test passed." << std::endl; +} diff --git a/SYCL/AtomicRef/compare_exchange.cpp b/SYCL/AtomicRef/compare_exchange.cpp index 788263baf4..518028dc59 100644 --- a/SYCL/AtomicRef/compare_exchange.cpp +++ b/SYCL/AtomicRef/compare_exchange.cpp @@ -31,9 +31,9 @@ template void compare_exchange_test(queue q, size_t N) { cgh.parallel_for>( range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); + auto atm = + atomic_ref(exc[0]); T result = T(N); // Avoid copying pointer bool success = atm.compare_exchange_strong(result, (T)gid); if (success) { diff --git a/SYCL/AtomicRef/exchange.cpp b/SYCL/AtomicRef/exchange.cpp index 181739c113..c3e1584769 100644 --- a/SYCL/AtomicRef/exchange.cpp +++ b/SYCL/AtomicRef/exchange.cpp @@ -29,8 +29,7 @@ template void exchange_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(exc[0]); out[gid] = atm.exchange(T(gid)); }); diff --git a/SYCL/AtomicRef/load.cpp b/SYCL/AtomicRef/load.cpp index f4090afbea..eb6f30f8b8 100644 --- a/SYCL/AtomicRef/load.cpp +++ b/SYCL/AtomicRef/load.cpp @@ -28,8 +28,7 @@ template void load_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(ld[0]); out[gid] = atm.load(); }); diff --git a/SYCL/AtomicRef/max.cpp b/SYCL/AtomicRef/max.cpp index db7735e68d..8f12c1a6ed 100644 --- a/SYCL/AtomicRef/max.cpp +++ b/SYCL/AtomicRef/max.cpp @@ -27,8 +27,7 @@ template void max_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); // +1 accounts for lowest() returning 0 for unsigned types diff --git a/SYCL/AtomicRef/min.cpp b/SYCL/AtomicRef/min.cpp index 324fdcca9f..eaf55783da 100644 --- a/SYCL/AtomicRef/min.cpp +++ b/SYCL/AtomicRef/min.cpp @@ -27,8 +27,7 @@ template void min_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_min(T(gid)); }); diff --git a/SYCL/AtomicRef/store.cpp b/SYCL/AtomicRef/store.cpp index 57d89aa327..1a55231b17 100644 --- a/SYCL/AtomicRef/store.cpp +++ b/SYCL/AtomicRef/store.cpp @@ -22,8 +22,7 @@ template void store_test(queue q, size_t N) { auto st = store_buf.template get_access(cgh); cgh.parallel_for>(range<1>(N), [=](item<1> it) { size_t gid = it.get_id(0); - auto atm = atomic_ref(st[0]); atm.store(T(gid)); }); diff --git a/SYCL/AtomicRef/sub.cpp b/SYCL/AtomicRef/sub.cpp index 013197f264..d3c105d760 100644 --- a/SYCL/AtomicRef/sub.cpp +++ b/SYCL/AtomicRef/sub.cpp @@ -28,8 +28,7 @@ void sub_fetch_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm.fetch_sub(Difference(1)); }); @@ -64,8 +63,7 @@ void sub_plus_equal_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm -= Difference(1); }); @@ -100,8 +98,7 @@ void sub_pre_dec_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = --atm; }); @@ -136,8 +133,7 @@ void sub_post_dec_test(queue q, size_t N) { output_buf.template get_access(cgh); cgh.parallel_for(range<1>(N), [=](item<1> it) { int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); out[gid] = atm--; });