diff --git a/SYCL/AtomicRef/add-native.cpp b/SYCL/AtomicRef/add-native.cpp deleted file mode 100644 index dcd5329e86..0000000000 --- a/SYCL/AtomicRef/add-native.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \ -// RUN: -fsycl-targets=%sycl_triple %s -o %t.out \ -// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda-sycldevice --cuda-gpu-arch=sm_60 -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// TODO: Remove items from UNSUPPORTED once corresponding backends support -// "native" implementation -// UNSUPPORTED: cpu - -#include -#include -#include -#include -#include -#include -using namespace sycl; -using namespace sycl::ONEAPI; - -template -void add_fetch_test(queue q, size_t N) { - T sum = 0; - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer sum_buf(&sum, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto sum = sum_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); - out[gid] = atm.fetch_add(Difference(1)); - }); - }); - } - - // All work-items increment by 1, so final value should be equal to N - assert(sum == T(N)); - - // Fetch returns original value: will be in [0, N-1] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(0) && *max_e == T(N - 1)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void add_plus_equal_test(queue q, size_t N) { - T sum = 0; - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer sum_buf(&sum, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto sum = sum_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); - out[gid] = atm += Difference(1); - }); - }); - } - - // All work-items increment by 1, so final value should be equal to N - assert(sum == T(N)); - - // += returns updated value: will be in [1, N] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(1) && *max_e == T(N)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void add_pre_inc_test(queue q, size_t N) { - T sum = 0; - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer sum_buf(&sum, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto sum = sum_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); - out[gid] = ++atm; - }); - }); - } - - // All work-items increment by 1, so final value should be equal to N - assert(sum == T(N)); - - // Pre-increment returns updated value: will be in [1, N] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(1) && *max_e == T(N)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void add_post_inc_test(queue q, size_t N) { - T sum = 0; - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer sum_buf(&sum, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto sum = sum_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(sum[0]); - out[gid] = atm++; - }); - }); - } - - // All work-items increment by 1, so final value should be equal to N - assert(sum == T(N)); - - // Post-increment returns original value: will be in [0, N-1] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(0) && *max_e == T(N - 1)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void add_test(queue q, size_t N) { - add_fetch_test(q, N); - add_plus_equal_test(q, N); - add_pre_inc_test(q, N); - add_post_inc_test(q, N); -} - -// Floating-point types do not support pre- or post-increment -template <> void add_test(queue q, size_t N) { - add_fetch_test(q, N); - add_plus_equal_test(q, N); -} -template <> void add_test(queue q, size_t N) { - add_fetch_test(q, N); - add_plus_equal_test(q, N); -} - -int main() { - queue q; - std::string version = q.get_device().get_info(); - - constexpr int N = 32; - add_test(q, N); - add_test(q, N); - add_test(q, N); - add_test(q, N); - add_test(q, N); - add_test(q, N); - add_test(q, N); - add_test(q, N); - add_test(q, N); - - std::cout << "Test passed." << std::endl; -} diff --git a/SYCL/AtomicRef/add-emulated.cpp b/SYCL/AtomicRef/add.cpp similarity index 98% rename from SYCL/AtomicRef/add-emulated.cpp rename to SYCL/AtomicRef/add.cpp index c76b7ef99d..a947a0d450 100644 --- a/SYCL/AtomicRef/add-emulated.cpp +++ b/SYCL/AtomicRef/add.cpp @@ -1,4 +1,5 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out \ +// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda-sycldevice --cuda-gpu-arch=sm_60 // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/AtomicRef/max-native.cpp b/SYCL/AtomicRef/max-native.cpp deleted file mode 100644 index 42f3f83404..0000000000 --- a/SYCL/AtomicRef/max-native.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \ -// RUN: -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// TODO: Remove items from UNSUPPORTED once corresponding backends support -// "native" implementation -// UNSUPPORTED: cpu, cuda - -#include -#include -#include -#include -#include -#include -using namespace sycl; -using namespace sycl::ONEAPI; - -template void max_test(queue q, size_t N) { - T initial = std::numeric_limits::lowest(); - T val = initial; - std::vector output(N); - std::fill(output.begin(), output.end(), std::numeric_limits::max()); - { - buffer val_buf(&val, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto val = val_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); - - // +1 accounts for lowest() returning 0 for unsigned types - out[gid] = atm.fetch_max(T(gid) + 1); - }); - }); - } - - // Final value should be equal to N - assert(val == N); - - // Only one work-item should have received the initial value - assert(std::count(output.begin(), output.end(), initial) == 1); - - // fetch_max returns original value - // Intermediate values should all be >= initial value - for (int i = 0; i < N; ++i) { - assert(output[i] >= initial); - } -} - -int main() { - queue q; - std::string version = q.get_device().get_info(); - - constexpr int N = 32; - max_test(q, N); - max_test(q, N); - max_test(q, N); - max_test(q, N); - max_test(q, N); - max_test(q, N); - max_test(q, N); - max_test(q, N); - - std::cout << "Test passed." << std::endl; -} diff --git a/SYCL/AtomicRef/max-emulated.cpp b/SYCL/AtomicRef/max.cpp similarity index 100% rename from SYCL/AtomicRef/max-emulated.cpp rename to SYCL/AtomicRef/max.cpp index 44e100387c..db7735e68d 100644 --- a/SYCL/AtomicRef/max-emulated.cpp +++ b/SYCL/AtomicRef/max.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out #include #include diff --git a/SYCL/AtomicRef/min-native.cpp b/SYCL/AtomicRef/min-native.cpp deleted file mode 100644 index eac73e7332..0000000000 --- a/SYCL/AtomicRef/min-native.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \ -// RUN: -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// TODO: Remove items from UNSUPPORTED once corresponding backends support -// "native" implementation -// UNSUPPORTED: cpu, cuda - -#include -#include -#include -#include -#include -#include -using namespace sycl; -using namespace sycl::ONEAPI; - -template void min_test(queue q, size_t N) { - T initial = std::numeric_limits::max(); - T val = initial; - std::vector output(N); - std::fill(output.begin(), output.end(), 0); - { - buffer val_buf(&val, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto val = val_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); - out[gid] = atm.fetch_min(T(gid)); - }); - }); - } - - // Final value should be equal to 0 - assert(val == 0); - - // Only one work-item should have received the initial value - assert(std::count(output.begin(), output.end(), initial) == 1); - - // fetch_min returns original value - // Intermediate values should all be <= initial value - for (int i = 0; i < N; ++i) { - assert(output[i] <= initial); - } -} - -int main() { - queue q; - std::string version = q.get_device().get_info(); - - constexpr int N = 32; - min_test(q, N); - min_test(q, N); - min_test(q, N); - min_test(q, N); - min_test(q, N); - min_test(q, N); - min_test(q, N); - min_test(q, N); - - std::cout << "Test passed." << std::endl; -} diff --git a/SYCL/AtomicRef/min-emulated.cpp b/SYCL/AtomicRef/min.cpp similarity index 100% rename from SYCL/AtomicRef/min-emulated.cpp rename to SYCL/AtomicRef/min.cpp index ca94aa5a7f..324fdcca9f 100644 --- a/SYCL/AtomicRef/min-emulated.cpp +++ b/SYCL/AtomicRef/min.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out #include #include diff --git a/SYCL/AtomicRef/sub-native.cpp b/SYCL/AtomicRef/sub-native.cpp deleted file mode 100644 index 5a8c709517..0000000000 --- a/SYCL/AtomicRef/sub-native.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -DSYCL_USE_NATIVE_FP_ATOMICS \ -// RUN: -fsycl-targets=%sycl_triple %s -o %t.out \ -// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda-sycldevice --cuda-gpu-arch=sm_60 -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// TODO: Remove items from UNSUPPORTED once corresponding backends support -// "native" implementation -// UNSUPPORTED: cpu - -#include -#include -#include -#include -#include -#include -using namespace sycl; -using namespace sycl::ONEAPI; - -template -void sub_fetch_test(queue q, size_t N) { - T val = T(N); - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer val_buf(&val, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto val = val_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); - out[gid] = atm.fetch_sub(Difference(1)); - }); - }); - } - - // All work-items decrement by 1, so final value should be equal to 0 - assert(val == T(0)); - - // Fetch returns original value: will be in [1, N] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(1) && *max_e == T(N)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void sub_plus_equal_test(queue q, size_t N) { - T val = T(N); - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer val_buf(&val, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto val = val_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); - out[gid] = atm -= Difference(1); - }); - }); - } - - // All work-items decrement by 1, so final value should be equal to 0 - assert(val == T(0)); - - // -= returns updated value: will be in [0, N-1] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(0) && *max_e == T(N - 1)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void sub_pre_dec_test(queue q, size_t N) { - T val = T(N); - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer val_buf(&val, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto val = val_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); - out[gid] = --atm; - }); - }); - } - - // All work-items decrement by 1, so final value should be equal to 0 - assert(val == T(0)); - - // Pre-decrement returns updated value: will be in [0, N-1] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(0) && *max_e == T(N - 1)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void sub_post_dec_test(queue q, size_t N) { - T val = T(N); - std::vector output(N); - std::fill(output.begin(), output.end(), T(0)); - { - buffer val_buf(&val, 1); - buffer output_buf(output.data(), output.size()); - - q.submit([&](handler &cgh) { - auto val = val_buf.template get_access(cgh); - auto out = - output_buf.template get_access(cgh); - cgh.parallel_for(range<1>(N), [=](item<1> it) { - int gid = it.get_id(0); - auto atm = atomic_ref(val[0]); - out[gid] = atm--; - }); - }); - } - - // All work-items decrement by 1, so final value should be equal to 0 - assert(val == T(0)); - - // Post-decrement returns original value: will be in [1, N] - auto min_e = std::min_element(output.begin(), output.end()); - auto max_e = std::max_element(output.begin(), output.end()); - assert(*min_e == T(1) && *max_e == T(N)); - - // Intermediate values should be unique - std::sort(output.begin(), output.end()); - assert(std::unique(output.begin(), output.end()) == output.end()); -} - -template -void sub_test(queue q, size_t N) { - sub_fetch_test(q, N); - sub_plus_equal_test(q, N); - sub_pre_dec_test(q, N); - sub_post_dec_test(q, N); -} - -// Floating-point types do not support pre- or post-decrement -template <> void sub_test(queue q, size_t N) { - sub_fetch_test(q, N); - sub_plus_equal_test(q, N); -} -template <> void sub_test(queue q, size_t N) { - sub_fetch_test(q, N); - sub_plus_equal_test(q, N); -} - -int main() { - queue q; - std::string version = q.get_device().get_info(); - - constexpr int N = 32; - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - sub_test(q, N); - - std::cout << "Test passed." << std::endl; -} diff --git a/SYCL/AtomicRef/sub-emulated.cpp b/SYCL/AtomicRef/sub.cpp similarity index 98% rename from SYCL/AtomicRef/sub-emulated.cpp rename to SYCL/AtomicRef/sub.cpp index 04e5059f7f..013197f264 100644 --- a/SYCL/AtomicRef/sub-emulated.cpp +++ b/SYCL/AtomicRef/sub.cpp @@ -1,4 +1,5 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out \ +// RUN: -Xsycl-target-backend=nvptx64-nvidia-cuda-sycldevice --cuda-gpu-arch=sm_60 // RUN: %HOST_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out