diff --git a/SYCL/SubGroup/helper.hpp b/SYCL/SubGroup/helper.hpp index 964fb742bc..712537db3f 100644 --- a/SYCL/SubGroup/helper.hpp +++ b/SYCL/SubGroup/helper.hpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// #include +#include #include #include #include @@ -36,6 +37,17 @@ template struct utils { std::to_string((T2)v.s1()) + " )"; } }; +template struct utils { + static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2(); } + static bool cmp_vec(const vec &v, const vec &r) { + return v.s0() == r.s0() && v.s1() == r.s1() && v.s2() == r.s2(); + } + static std::string stringify_vec(const vec &v) { + return std::string("(") + std::to_string((T2)v.s0()) + ", " + + std::to_string((T2)v.s1()) + ", " + std::to_string((T2)v.s3()) + + " )"; + } +}; template struct utils { static T2 add_vec(const vec &v) { return v.s0() + v.s1() + v.s2() + v.s3(); @@ -98,9 +110,12 @@ template struct utils { template void exit_if_not_equal(T val, T ref, const char *name) { if (std::is_floating_point::value) { - if (std::fabs(val - ref) > 0.01) { - std::cout << "Unexpected result for " << name << ": " << (double)val - << " expected value: " << (double)ref << std::endl; + auto cmp_val = std::bitset(val); + auto cmp_ref = std::bitset(ref); + if (cmp_val != cmp_ref) { + std::cout << "Unexpected result for " << name << ": " << val << "(" + << cmp_val << ") expected value: " << ref << "(" << cmp_ref + << ")" << std::endl; exit(1); } } else { @@ -115,12 +130,9 @@ template void exit_if_not_equal(T val, T ref, const char *name) { template void exit_if_not_equal(std::complex val, std::complex ref, const char *name) { - if (std::fabs(val.real() - ref.real()) > 0.01 || - std::fabs(val.imag() - ref.imag()) > 0.01) { - std::cout << "Unexpected result for " << name << ": " << val - << " expected value: " << ref << std::endl; - exit(1); - } + std::string Name{name}; + exit_if_not_equal(val.real(), ref.real(), (Name + ".real()").c_str()); + exit_if_not_equal(val.imag(), ref.imag(), (Name + ".imag()").c_str()); } template void exit_if_not_equal(T *val, T *ref, const char *name) { diff --git a/SYCL/SubGroup/load_store.cpp b/SYCL/SubGroup/load_store.cpp index ffd9bb21c7..f3409ef00c 100644 --- a/SYCL/SubGroup/load_store.cpp +++ b/SYCL/SubGroup/load_store.cpp @@ -1,10 +1,8 @@ -// UNSUPPORTED: cpu -// #2252 Disable until all variants of built-ins are available in OpenCL CPU -// runtime for every supported ISA -// // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA +// RUNx %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out // @@ -23,15 +21,10 @@ template class sycl_subgr; using namespace cl::sycl; template void check(queue &Queue) { - const int G = 1024, L = 128; + const int G = 512, L = 256; - // Pad arrays based on sub-group size to ensure no out-of-bounds accesses - // Workaround for info::device::sub_group_sizes support on some devices - size_t max_sg_size = 128; -#if 0 auto sg_sizes = Queue.get_device().get_info(); size_t max_sg_size = *std::max_element(sg_sizes.begin(), sg_sizes.end()); -#endif try { nd_range<1> NdRange(G, L); @@ -41,7 +34,7 @@ template void check(queue &Queue) { auto acc = syclbuf.template get_access(); for (int i = 0; i < G; i++) { acc[i] = i; - acc[i] += 0.1; // Check that floating point types are not casted to int + acc[i] += 0.25; // Check that floating point types are not casted to int } } Queue.submit([&](handler &cgh) { @@ -51,22 +44,24 @@ template void check(queue &Queue) { {L + max_sg_size * N}, cgh); cgh.parallel_for>(NdRange, [=](nd_item<1> NdItem) { ONEAPI::sub_group SG = NdItem.get_sub_group(); - if (SG.get_group_id().get(0) % N == 0) { - size_t SGOffset = - SG.get_group_id().get(0) * SG.get_max_local_range().get(0); + auto SGid = SG.get_group_id().get(0); + auto SGsize = SG.get_max_local_range().get(0); + /* Avoid overlapping data ranges inside and between local groups */ + if (SGid % N == 0 && (SGid + N) * SGsize <= L) { + size_t SGOffset = SGid * SGsize; size_t WGSGoffset = NdItem.get_group(0) * L + SGOffset; multi_ptr mp( &acc[WGSGoffset]); multi_ptr MPL( &LocalMem[SGOffset]); // Add all values in read block - vec v(utils::add_vec(SG.load(mp))); + vec v(SG.load(mp)); SG.store(MPL, v); vec t(utils::add_vec(SG.load(MPL))); SG.store(mp, t); } if (NdItem.get_global_id(0) == 0) - sgsizeacc[0] = SG.get_max_local_range()[0]; + sgsizeacc[0] = SGsize; }); }); auto acc = syclbuf.template get_access(); @@ -86,12 +81,11 @@ template void check(queue &Queue) { ref = acc[j - (SGid % N) * sg_size]; } else { for (int i = 0; i < N; i++) { - ref += (T)(j + i * sg_size) + 0.1; + ref += (T)(j + i * sg_size) + 0.25; } - ref *= N; } /* There is no defined out-of-range behavior for these functions. */ - if ((SGid + N) * sg_size < L) { + if ((SGid + N) * sg_size <= L) { std::string s("Vector<"); s += std::string(typeid(ref).name()) + std::string(",") + std::to_string(N) + std::string(">[") + std::to_string(j) + @@ -181,20 +175,26 @@ int main() { check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); typedef unsigned int aligned_uint __attribute__((aligned(16))); check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); typedef float aligned_float __attribute__((aligned(16))); check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); } if (Queue.get_device().has_extension("cl_intel_subgroups_short") || PlatformName.find("CUDA") != std::string::npos) { @@ -202,16 +202,20 @@ int main() { check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); if (Queue.get_device().has_extension("cl_khr_fp16") || PlatformName.find("CUDA") != std::string::npos) { typedef half aligned_half __attribute__((aligned(16))); check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); } } if (Queue.get_device().has_extension("cl_intel_subgroups_long") || @@ -220,20 +224,26 @@ int main() { check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); typedef unsigned long aligned_ulong __attribute__((aligned(16))); check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); typedef double aligned_double __attribute__((aligned(16))); check(Queue); check(Queue); check(Queue); + check(Queue); check(Queue); check(Queue); + check(Queue); } std::cout << "Test passed." << std::endl; return 0;