From ea7d1e2f3e9e2efe18a4154263d4d367ce2564fa Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Wed, 15 Sep 2021 13:37:51 +0300 Subject: [PATCH 1/7] [SYCL] Update sub-group mask test The test is updated according spec changes in intel/llvm#4481 --- SYCL/{GroupMask => SubGroupMask}/Basic.cpp | 37 ++++++++++++++-------- 1 file changed, 24 insertions(+), 13 deletions(-) rename SYCL/{GroupMask => SubGroupMask}/Basic.cpp (69%) diff --git a/SYCL/GroupMask/Basic.cpp b/SYCL/SubGroupMask/Basic.cpp similarity index 69% rename from SYCL/GroupMask/Basic.cpp rename to SYCL/SubGroupMask/Basic.cpp index 6fe6956e75..95e8b86954 100644 --- a/SYCL/GroupMask/Basic.cpp +++ b/SYCL/SubGroupMask/Basic.cpp @@ -17,7 +17,7 @@ using namespace sycl; constexpr int global_size = 128; constexpr int local_size = 32; int main() { -#ifdef SYCL_EXT_ONEAPI_GROUP_MASK +#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK queue Queue; try { @@ -29,12 +29,15 @@ int main() { Queue.submit([&](handler &cgh) { auto resacc = resbuf.get_access(cgh); - cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { + cgh.parallel_for(NdRange, [=](nd_item<1> + NdItem) { size_t GID = NdItem.get_global_linear_id(); auto SG = NdItem.get_sub_group(); + // AAAAAAAA auto gmask_gid2 = ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2); - auto gmask_gid3 = + // B6DB6DB6 + auto gmask_gid3 = ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3); NdItem.barrier(); @@ -48,28 +51,36 @@ int main() { ((bool)(i % 2) ^ (bool)(i % 3))) << 3; } - gmask_gid2 <<= 32; - res |= (gmask_gid2.extract_bits()[2] != 0xaaaaaaaa) << 4; - res |= ((gmask_gid2 >> 8).extract_bits()[3] != 0xaa000000) << 5; - res |= ((gmask_gid3 >> 8).extract_bits()[3] != 0xb6db6d) << 6; - res |= (!gmask_gid2[32] && gmask_gid2[31]) << 7; + gmask_gid2 <<= 8; + uint32_t r=0; + gmask_gid2.extract_bits(r); + res |= (r != 0xaaaaaa00) << 4; + (gmask_gid2>>4).extract_bits(r); + res |= (r != 0x0aaaaaa0) << 5; + gmask_gid3.insert_bits((char)0b01010101,8); + res |= (!gmask_gid3[8]||gmask_gid3[9]||!gmask_gid3[10]||gmask_gid3[11]) << 6; + marray mr{1}; + gmask_gid3.extract_bits(mr); + res |= (mr[0]!=0xb6||mr[1]!=0x55||mr[2]!=0xdb||mr[3]!=0xb6||mr[4]||mr[5]) << 7; + res |= (gmask_gid2[30] || !gmask_gid2[31]) << 8; gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true; - res |= (gmask_gid3.extract_bits()[3] != 0xb6db6dff) << 7; + gmask_gid3.extract_bits(r); + res |= (r != 0xb6db55ff) << 9; gmask_gid3.reset(); res |= !(gmask_gid3.none() && gmask_gid2.any() && !gmask_gid2.all()) - << 8; + << 10; gmask_gid2.set(); res |= !(gmask_gid3.none() && gmask_gid2.any() && gmask_gid2.all()) - << 9; + << 11; gmask_gid3.flip(); - res |= (gmask_gid3 != gmask_gid2) << 10; + res |= (gmask_gid3 != gmask_gid2) << 12; resacc[0] = res; } }); }); } if (Res) { - std::cout << "Unexpected result for group_mask operation: " << Res + std::cout << "Unexpected result for sub_group_mask operation: " << Res << std::endl; exit(1); } From e9c0dd61bd3b36e303be96f3f38ea8d86d6edd1f Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Wed, 15 Sep 2021 13:42:51 +0300 Subject: [PATCH 2/7] Update CODEOWNERS following renaming --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 28a403e8bd..4c1f6f8d0b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -35,7 +35,7 @@ SYCL/Functor @AlexeySachkov # Group algorithms SYCL/GroupAlgorithm @Pennycook @AlexeySachkov SYCL/SubGroup @Pennycook @AlexeySachkov -SYCL/GroupMask @Pennycook @vladimilaz +SYCL/SubGroupMask @Pennycook @vladimilaz # Group local memory SYCL/GroupLocalMemory @sergey-semenov @Pennycook From 9477e124e5e774b9894856c774374b63e96ad715 Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Wed, 15 Sep 2021 13:59:01 +0300 Subject: [PATCH 3/7] fix clang format --- SYCL/SubGroupMask/Basic.cpp | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/SYCL/SubGroupMask/Basic.cpp b/SYCL/SubGroupMask/Basic.cpp index 95e8b86954..570758e203 100644 --- a/SYCL/SubGroupMask/Basic.cpp +++ b/SYCL/SubGroupMask/Basic.cpp @@ -33,11 +33,11 @@ int main() { NdItem) { size_t GID = NdItem.get_global_linear_id(); auto SG = NdItem.get_sub_group(); - // AAAAAAAA + // AAAAAAAA auto gmask_gid2 = ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2); // B6DB6DB6 - auto gmask_gid3 = + auto gmask_gid3 = ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3); NdItem.barrier(); @@ -52,19 +52,23 @@ int main() { << 3; } gmask_gid2 <<= 8; - uint32_t r=0; - gmask_gid2.extract_bits(r); + uint32_t r = 0; + gmask_gid2.extract_bits(r); res |= (r != 0xaaaaaa00) << 4; - (gmask_gid2>>4).extract_bits(r); - res |= (r != 0x0aaaaaa0) << 5; - gmask_gid3.insert_bits((char)0b01010101,8); - res |= (!gmask_gid3[8]||gmask_gid3[9]||!gmask_gid3[10]||gmask_gid3[11]) << 6; - marray mr{1}; - gmask_gid3.extract_bits(mr); - res |= (mr[0]!=0xb6||mr[1]!=0x55||mr[2]!=0xdb||mr[3]!=0xb6||mr[4]||mr[5]) << 7; + (gmask_gid2 >> 4).extract_bits(r); + res |= (r != 0x0aaaaaa0) << 5; + gmask_gid3.insert_bits((char)0b01010101, 8); + res |= (!gmask_gid3[8] || gmask_gid3[9] || !gmask_gid3[10] || + gmask_gid3[11]) + << 6; + marray mr{1}; + gmask_gid3.extract_bits(mr); + res |= (mr[0] != 0xb6 || mr[1] != 0x55 || mr[2] != 0xdb || + mr[3] != 0xb6 || mr[4] || mr[5]) + << 7; res |= (gmask_gid2[30] || !gmask_gid2[31]) << 8; gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true; - gmask_gid3.extract_bits(r); + gmask_gid3.extract_bits(r); res |= (r != 0xb6db55ff) << 9; gmask_gid3.reset(); res |= !(gmask_gid3.none() && gmask_gid2.any() && !gmask_gid2.all()) From 1c3ccf485843708dd216bbb888d0d90052942afa Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Wed, 15 Sep 2021 19:28:04 +0300 Subject: [PATCH 4/7] Add testing for diffrent SG sizes --- SYCL/SubGroupMask/Basic.cpp | 103 ++++++++++++++++---------------- SYCL/SubGroupMask/GroupSize.cpp | 89 +++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 50 deletions(-) create mode 100644 SYCL/SubGroupMask/GroupSize.cpp diff --git a/SYCL/SubGroupMask/Basic.cpp b/SYCL/SubGroupMask/Basic.cpp index 570758e203..94ff45a882 100644 --- a/SYCL/SubGroupMask/Basic.cpp +++ b/SYCL/SubGroupMask/Basic.cpp @@ -29,58 +29,61 @@ int main() { Queue.submit([&](handler &cgh) { auto resacc = resbuf.get_access(cgh); - cgh.parallel_for(NdRange, [=](nd_item<1> - NdItem) { - size_t GID = NdItem.get_global_linear_id(); - auto SG = NdItem.get_sub_group(); - // AAAAAAAA - auto gmask_gid2 = - ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2); - // B6DB6DB6 - auto gmask_gid3 = - ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3); - NdItem.barrier(); + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) [[intel::reqd_sub_group_size(32)]] { + size_t GID = NdItem.get_global_linear_id(); + auto SG = NdItem.get_sub_group(); + // AAAAAAAA + auto gmask_gid2 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2); + // B6DB6DB6 + auto gmask_gid3 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3); - if (!GID) { - int res = 0; + if (!GID) { + int res = 0; - for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) { - res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3)) << 1; - res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3)) << 2; - res |= !((gmask_gid2 ^ gmask_gid3)[i] == - ((bool)(i % 2) ^ (bool)(i % 3))) - << 3; - } - gmask_gid2 <<= 8; - uint32_t r = 0; - gmask_gid2.extract_bits(r); - res |= (r != 0xaaaaaa00) << 4; - (gmask_gid2 >> 4).extract_bits(r); - res |= (r != 0x0aaaaaa0) << 5; - gmask_gid3.insert_bits((char)0b01010101, 8); - res |= (!gmask_gid3[8] || gmask_gid3[9] || !gmask_gid3[10] || - gmask_gid3[11]) - << 6; - marray mr{1}; - gmask_gid3.extract_bits(mr); - res |= (mr[0] != 0xb6 || mr[1] != 0x55 || mr[2] != 0xdb || - mr[3] != 0xb6 || mr[4] || mr[5]) - << 7; - res |= (gmask_gid2[30] || !gmask_gid2[31]) << 8; - gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true; - gmask_gid3.extract_bits(r); - res |= (r != 0xb6db55ff) << 9; - gmask_gid3.reset(); - res |= !(gmask_gid3.none() && gmask_gid2.any() && !gmask_gid2.all()) - << 10; - gmask_gid2.set(); - res |= !(gmask_gid3.none() && gmask_gid2.any() && gmask_gid2.all()) - << 11; - gmask_gid3.flip(); - res |= (gmask_gid3 != gmask_gid2) << 12; - resacc[0] = res; - } - }); + for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) { + res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3)) + << 1; + res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3)) + << 2; + res |= !((gmask_gid2 ^ gmask_gid3)[i] == + ((bool)(i % 2) ^ (bool)(i % 3))) + << 3; + } + gmask_gid2 <<= 8; + uint32_t r = 0; + gmask_gid2.extract_bits(r); + res |= (r != 0xaaaaaa00) << 4; + (gmask_gid2 >> 4).extract_bits(r); + res |= (r != 0x0aaaaaa0) << 5; + gmask_gid3.insert_bits((char)0b01010101, 8); + res |= (!gmask_gid3[8] || gmask_gid3[9] || !gmask_gid3[10] || + gmask_gid3[11]) + << 6; + marray mr{1}; + gmask_gid3.extract_bits(mr); + res |= (mr[0] != 0xb6 || mr[1] != 0x55 || mr[2] != 0xdb || + mr[3] != 0xb6 || mr[4] || mr[5]) + << 7; + res |= (gmask_gid2[30] || !gmask_gid2[31]) << 8; + gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true; + gmask_gid3.extract_bits(r); + res |= (r != 0xb6db55ff) << 9; + gmask_gid3.reset(); + res |= !(gmask_gid3.none() && gmask_gid2.any() && + !gmask_gid2.all()) + << 10; + gmask_gid2.set(); + res |= + !(gmask_gid3.none() && gmask_gid2.any() && gmask_gid2.all()) + << 11; + gmask_gid3.flip(); + res |= (gmask_gid3 != gmask_gid2) << 12; + resacc[0] = res; + } + }); }); } if (Res) { diff --git a/SYCL/SubGroupMask/GroupSize.cpp b/SYCL/SubGroupMask/GroupSize.cpp new file mode 100644 index 0000000000..802db3493f --- /dev/null +++ b/SYCL/SubGroupMask/GroupSize.cpp @@ -0,0 +1,89 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// REQUIRES: gpu +// UNSUPPORTED: cuda, hip +// GroupNonUniformBallot capability is supported on Intel GPU only +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +//==---------- Basic.cpp - SYCL Group Mask basic test ----------*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +using namespace sycl; +constexpr int global_size = 128; +constexpr int local_size = 32; +template class sycl_subgr; + +template void test(queue Queue) { + std::cout << "Testing sub_group_mask for sub_group size=" << SGSize + << std::endl; + try { + nd_range<1> NdRange(global_size, local_size); + int Res[32 / SGSize] = {0}; + { + buffer resbuf(Res, range<1>(32 / SGSize)); + + Queue.submit([&](handler &cgh) { + auto resacc = resbuf.template get_access(cgh); + + cgh.parallel_for>( + NdRange, [= + ](nd_item<1> NdItem) [[intel::reqd_sub_group_size(SGSize)]] { + auto SG = NdItem.get_sub_group(); + auto LID = SG.get_local_id(); + auto SGID = SG.get_group_id(); + + auto gmask_gid2 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), LID % 2); + auto gmask_gid3 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), LID % 3); + + if (!LID) { + int res = 0; + + for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) { + res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3)) + << 1; + res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3)) + << 2; + res |= !((gmask_gid2 ^ gmask_gid3)[i] == + ((bool)(i % 2) ^ (bool)(i % 3))) + << 3; + } + res |= (gmask_gid2.size() != SG.get_max_local_range()[1]) << 4; + resacc[SGID] = res; + } + }); + }); + } + for (size_t i = 0; i < 32 / SGSize; i++) { + if (Res[i]) { + std::cout + << "Unexpected result for sub_group_mask operation for sub group " + << i << ": " << Res[i] << std::endl; + exit(1); + } + } + } catch (exception e) { + std::cout << "SYCL exception caught: " << e.what(); + exit(1); + } +} +int main() { +#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK + queue Queue; + + test<8>(Queue); + test<16>(Queue); + test<32>(Queue); + + std::cout << "Test passed." << std::endl; +#else + std::cout << "Test skipped due to missing extension." << std::endl; +#endif + return 0; +} From 23f621ab459cba5c7227b804d0586edeb26300ad Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Wed, 15 Sep 2021 20:06:51 +0300 Subject: [PATCH 5/7] fix misprint --- SYCL/SubGroupMask/GroupSize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SYCL/SubGroupMask/GroupSize.cpp b/SYCL/SubGroupMask/GroupSize.cpp index 802db3493f..73069667ea 100644 --- a/SYCL/SubGroupMask/GroupSize.cpp +++ b/SYCL/SubGroupMask/GroupSize.cpp @@ -54,7 +54,7 @@ template void test(queue Queue) { ((bool)(i % 2) ^ (bool)(i % 3))) << 3; } - res |= (gmask_gid2.size() != SG.get_max_local_range()[1]) << 4; + res |= (gmask_gid2.size() != SG.get_max_local_range()[0]) << 4; resacc[SGID] = res; } }); From 5b225163f5b966b7a183ed27e1699a7d0a95c0e5 Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Wed, 15 Sep 2021 21:20:35 +0300 Subject: [PATCH 6/7] Fix failure when extension is not supported --- SYCL/SubGroupMask/GroupSize.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/SYCL/SubGroupMask/GroupSize.cpp b/SYCL/SubGroupMask/GroupSize.cpp index 73069667ea..c7c58d22e2 100644 --- a/SYCL/SubGroupMask/GroupSize.cpp +++ b/SYCL/SubGroupMask/GroupSize.cpp @@ -14,6 +14,7 @@ #include using namespace sycl; +#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK constexpr int global_size = 128; constexpr int local_size = 32; template class sycl_subgr; @@ -73,6 +74,8 @@ template void test(queue Queue) { exit(1); } } +#endif // SYCL_EXT_ONEAPI_SUB_GROUP_MASK + int main() { #ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK queue Queue; From 0ef00f73f956b09d3e54de5022ee74777398e6d1 Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Thu, 16 Sep 2021 09:03:16 +0300 Subject: [PATCH 7/7] Fix wording in comments --- SYCL/SubGroupMask/Basic.cpp | 2 +- SYCL/SubGroupMask/GroupSize.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SYCL/SubGroupMask/Basic.cpp b/SYCL/SubGroupMask/Basic.cpp index 94ff45a882..cffdd7ee25 100644 --- a/SYCL/SubGroupMask/Basic.cpp +++ b/SYCL/SubGroupMask/Basic.cpp @@ -4,7 +4,7 @@ // GroupNonUniformBallot capability is supported on Intel GPU only // RUN: %GPU_RUN_PLACEHOLDER %t.out -//==---------- Basic.cpp - SYCL Group Mask basic test ----------*- C++ -*---==// +//==---------- Basic.cpp - sub-group mask basic test -----------*- C++ -*---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/SYCL/SubGroupMask/GroupSize.cpp b/SYCL/SubGroupMask/GroupSize.cpp index c7c58d22e2..3f70fe58cb 100644 --- a/SYCL/SubGroupMask/GroupSize.cpp +++ b/SYCL/SubGroupMask/GroupSize.cpp @@ -4,7 +4,7 @@ // GroupNonUniformBallot capability is supported on Intel GPU only // RUN: %GPU_RUN_PLACEHOLDER %t.out -//==---------- Basic.cpp - SYCL Group Mask basic test ----------*- C++ -*---==// +//==- GroupSize.cpp - sub-group mask dependency on group size --*- C++ -*---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -20,7 +20,7 @@ constexpr int local_size = 32; template class sycl_subgr; template void test(queue Queue) { - std::cout << "Testing sub_group_mask for sub_group size=" << SGSize + std::cout << "Testing sub_group_mask for sub-group size=" << SGSize << std::endl; try { nd_range<1> NdRange(global_size, local_size); @@ -64,7 +64,7 @@ template void test(queue Queue) { for (size_t i = 0; i < 32 / SGSize; i++) { if (Res[i]) { std::cout - << "Unexpected result for sub_group_mask operation for sub group " + << "Unexpected result for sub_group_mask operation for sub-group " << i << ": " << Res[i] << std::endl; exit(1); }