From 1cbe971449060230b01dade6ab11768c7377b7fb Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 3 Jul 2024 09:51:39 +0100 Subject: [PATCH 01/17] [SYCL] Add max work-group size kernel properties This patch adds two kernel properties to allow users to specify the maximum work-group size that a kernel will be invoked with. The `max_work_group_size` property corresponds to the `intel::max_work_group_size` function attribute, but can be specified with 1, 2, or 3 dimensions (unlike the attribute which accepts only 3). The `max_total_work_group_size` property is similar but is always a single value which denotes the combined total work-group size. This can be used when the user cannot guarantee a maximum bound in each of the dimensions they wish to run the kernel, but can guarantee a total. This acts similarly to CUDA's `maxThreadsPerBlock` launch bounds property. This patch also wires up the 'max_work_group_size' property to the equivalent SPIR-V execution mode, which should hopefully improve certain use cases. --- clang/lib/CodeGen/Targets/NVPTX.cpp | 50 +++++++++++++++++ .../SYCLLowerIR/CompileTimePropertiesPass.cpp | 29 +++++----- ...sycl_ext_oneapi_kernel_properties.asciidoc | 40 +++++++++++++- .../oneapi/kernel_properties/properties.hpp | 55 +++++++++++++++++++ .../sycl/ext/oneapi/properties/property.hpp | 4 +- .../properties_kernel_launch_bounds.cpp | 20 +++++++ .../properties_kernel_launch_bounds_nvptx.cpp | 24 ++++++++ .../properties_kernel_max_work_group_size.cpp | 37 +++++++++++++ ...rties_kernel_max_work_group_size_nvptx.cpp | 51 +++++++++++++++++ .../properties/properties_kernel.cpp | 22 ++++++++ 10 files changed, 317 insertions(+), 15 deletions(-) create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 0bfbdb0d6e66a..5ce4e0ae63ff6 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -244,6 +244,29 @@ static bool supportsGridConstant(CudaArch Arch) { return Arch >= CudaArch::SM_70; } +static llvm::SmallVector, 3> +decomposeSYCLWGAttr(const llvm::Attribute &Attr) { + // Split up values in the comma-separated list of integers. + SmallVector ValStrs; + Attr.getValueAsString().split(ValStrs, ','); + assert(ValStrs.size() <= 3 && "Must have at most three dimensions for " + "SYCL work-group property"); + + llvm::SmallVector, 3> Ops; + // Index-flip the values; SYCL specifies fastest-moving dimensions + // right-to-left: NVPTX is left-to-right. + for (auto ValStr : reverse(ValStrs)) { + size_t Value = 0; + [[maybe_unused]] bool Error = ValStr.getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + Ops.push_back(Value); + } + // Pad out any missing elements + Ops.append(3 - std::max(Ops.size(), size_t{3}), std::nullopt); + + return Ops; +} + void NVPTXTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const { if (GV->isDeclaration()) @@ -301,6 +324,33 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "maxntidx", MWGS->getZDimVal()); addNVVMMetadata(F, "maxntidy", MWGS->getYDimVal()); addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal()); + } else if (auto Attr = F->getFnAttribute("sycl-max-work-group-size"); + Attr.isValid()) { + auto Ops = decomposeSYCLWGAttr(Attr); + + // Work-group sizes (in NVVM annotations) must be positive and less than + // INT32_MAX, whereas SYCL can allow for larger work-group sizes (see + // -fno-sycl-id-queries-fit-in-int). If any dimension is too large for + // NVPTX, don't emit any annotation at all. + if (llvm::all_of(Ops, [](std::optional V) { + return !V || llvm::isUInt<31>(*V); + })) { + static constexpr const char *Annots[] = {"maxntidx", "maxntidy", + "maxntidz"}; + for (auto [AnnotStr, Val] : zip(Annots, Ops)) + if (Val.has_value()) + addNVVMMetadata(F, AnnotStr, *Val); + } + } + + if (auto Attr = F->getFnAttribute("sycl-max-total-work-group-size"); + Attr.isValid()) { + size_t Value = 0; + bool Error = Attr.getValueAsString().getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + if (llvm::isUInt<31>(Value)) { + addNVVMMetadata(F, "maxntidx", Value); + } } auto attrValue = [&](Expr *E) { diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 36adf1e52ff56..8f82b9a7088ba 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -361,18 +361,24 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64); } - if (AttrKindStr == "sycl-work-group-size" || - AttrKindStr == "sycl-work-group-size-hint") { + static constexpr std::tuple SimpleWGAttrs[] = { + {"sycl-work-group-size", "reqd_work_group_size"}, + {"sycl-work-group-size-hint", "work_group_size_hint"}, + {"sycl-max-work-group-size", "max_work_group_size"}, + }; + + for (auto &[AttrKind, MDStr] : SimpleWGAttrs) { + if (AttrKindStr != AttrKind) + continue; // Split values in the comma-separated list integers. - SmallVector ValStrs; - Attr.getValueAsString().split(ValStrs, ','); + SmallVector AttrValStrs; + Attr.getValueAsString().split(AttrValStrs, ','); - assert(ValStrs.size() <= 3 && - "sycl-work-group-size and sycl-work-group-size-hint currently only " - "support up to three values"); + assert(AttrValStrs.size() <= 3 && + "Incorrect number of values for kernel property"); // SYCL work-group sizes must be reversed for SPIR-V. - std::reverse(ValStrs.begin(), ValStrs.end()); + std::reverse(AttrValStrs.begin(), AttrValStrs.end()); // Use integer pointer size as closest analogue to size_t. IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx); @@ -381,14 +387,11 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { // Get the integers from the strings. SmallVector MDVals; - for (StringRef ValStr : ValStrs) + for (StringRef ValStr : AttrValStrs) MDVals.push_back(ConstantAsMetadata::get( Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10)))); - const char *MDName = (AttrKindStr == "sycl-work-group-size") - ? "reqd_work_group_size" - : "work_group_size_hint"; - return std::pair(MDName, MDNode::get(Ctx, MDVals)); + return std::pair(MDStr, MDNode::get(Ctx, MDVals)); } if (AttrKindStr == "sycl-sub-group-size") { diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc index 878f0862ac990..dba3b3db03214 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc @@ -59,6 +59,7 @@ Joe Garvey, Intel + Greg Lueck, Intel + John Pennycook, Intel + Roland Schulz, Intel +Fraser Cormack, Codeplay == Overview @@ -116,10 +117,14 @@ supports. === Kernel Properties -The kernel properties below correspond to kernel attributes defined in +Most of the kernel properties below correspond to kernel attributes defined in Section 5.8.1 of the SYCL 2020 specification. Note that deprecated attributes (such as `vec_type_hint`) are not included. +The `max_work_group_size` and `max_total_work_group_size` kernel properties are +also provided as complements to other properties concerning work-group sizes, +without a corresponding function attribute form. + ```c++ namespace sycl { namespace ext { @@ -138,6 +143,17 @@ struct work_group_size_hint_key { using value_t = property_value...>; }; // work_group_size_hint_key +// Corresponds to max_work_group_size +struct max_work_group_size_key { + template + using value_t = property_value...>; +}; // max_work_group_size_key + +struct max_total_work_group_size_key { + template + using value_t = property_value>; +}; // max_total_work_group_size_key + // Corresponds to reqd_sub_group_size struct sub_group_size_key { template @@ -174,6 +190,12 @@ inline constexpr work_group_size_key::value_t work_group_size; template inline constexpr work_group_size_hint_key::value_t work_group_size_hint; +template +inline constexpr max_work_group_size_key::value_t max_work_group_size; + +template +inline constexpr max_total_work_group_size_key::value_t max_total_work_group_size; + template inline constexpr sub_group_size_key::value_t sub_group_size; @@ -182,6 +204,8 @@ inline constexpr device_has_key::value_t device_has; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; +template <> struct is_property_key : std::true_type {}; +template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; @@ -211,6 +235,20 @@ template <> struct is_property_key : std::true_type {}; of the work-group used to invoke the kernel. The order of the template arguments matches the constructor of the `range` class. +|`max_work_group_size` +|The `max_total_work_group_size` property provides a promise to the compiler +that the kernel will never be launched with a larger work-group than the +specified size. The number of template arguments in the `Dims` parameter pack +must match the dimensionality of the work-group used to invoke the kernel. The +order of the template arguments matches the constructor of the `range` class. + +|`max_total_work_group_size` +|The `max_total_work_group_size` property provides a promise to the compiler +that the kernel will never be launched with a work-group with a larger combined +size the specified amount. The combined work-group size of work-items is +determined as the multiplicative product of the work-group size across all +dimensions of the work-group. + |`sub_group_size` |The `sub_group_size` property adds the requirement that the kernel must be compiled and executed with the specified sub-group size. An implementation may diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index e46ab88c43172..e1d206a7e223e 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -71,6 +71,21 @@ struct single_task_kernel_key { using value_t = property_value; }; +struct max_work_group_size_key + : detail::compile_time_property_key { + template + using value_t = property_value...>; +}; + +struct max_total_work_group_size_key + : detail::compile_time_property_key< + detail::PropKind::MaxTotalWorkGroupSize> { + template + using value_t = property_value>; +}; + template struct property_value, std::integral_constant...> { @@ -138,6 +153,28 @@ template <> struct property_value { using key_t = single_task_kernel_key; }; +template +struct property_value, + std::integral_constant...> { + static_assert(sizeof...(Dims) + 1 <= 3, + "max_work_group_size property currently " + "only supports up to three values."); + static_assert( + detail::AllNonZero::value, + "max_work_group_size property must only contain non-zero values."); + + using key_t = max_work_group_size_key; + + constexpr size_t operator[](int Dim) const { + return std::array{Dim0, Dims...}[Dim]; + } +}; + +template <> struct property_value { + using key_t = max_total_work_group_size_key; +}; + template inline constexpr work_group_size_key::value_t work_group_size; @@ -156,6 +193,14 @@ inline constexpr nd_range_kernel_key::value_t nd_range_kernel; inline constexpr single_task_kernel_key::value_t single_task_kernel; +template +inline constexpr max_work_group_size_key::value_t + max_work_group_size; + +template +inline constexpr max_total_work_group_size_key::value_t + max_total_work_group_size; + struct work_group_progress_key : detail::compile_time_property_key { template struct PropertyMetaInfo { static constexpr const char *name = "sycl-single-task-kernel"; static constexpr int value = 0; }; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-work-group-size"; + static constexpr const char *value = SizeListToStr::value; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-total-work-group-size"; + static constexpr size_t value = Size; +}; template struct HasKernelPropertiesGetMethod : std::false_type {}; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index e225928c4cd68..9b24ffefe88a0 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -212,8 +212,10 @@ enum PropKind : uint32_t { IncludeFiles = 71, RegisteredKernelNames = 72, ClusterLaunch = 73, + MaxWorkGroupSize = 74, + MaxTotalWorkGroupSize = 75, // PropKindSize must always be the last value. - PropKindSize = 74, + PropKindSize = 76, }; struct property_key_base_tag {}; diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp new file mode 100644 index 0000000000000..1efa8375b662e --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp @@ -0,0 +1,20 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_total_work_group_size<4>, + }; + // CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { +// CHECK-IR-SAME: "sycl-max-total-work-group-size"="4" diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp new file mode 100644 index 0000000000000..18d5d94538133 --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp @@ -0,0 +1,24 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_total_work_group_size<4>, + }; + + // CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { +// CHECK-IR-SAME: "sycl-max-total-work-group-size"="4" + +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1} +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"maxntidx", i32 4} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp new file mode 100644 index 0000000000000..6126cf197819d --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp @@ -0,0 +1,37 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + sycl::event Ev; + + constexpr auto Props1 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8>}; + constexpr auto Props2 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4>}; + constexpr auto Props3 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>}; + + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel0(){{.*}} #[[MaxWGSizeAttr0:[0-9]+]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD0:[0-9]+]] + Q.single_task(Props1, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel1(){{.*}} #[[MaxWGSizeAttr1:[0-9]+]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1:[0-9]+]] + Q.single_task(Ev, Props2, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel2(){{.*}} #[[MaxWGSizeAttr2:[0-9]+]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD2:[0-9]+]] + Q.single_task({Ev}, Props3, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[MaxWGSizeAttr0]] = { {{.*}}"sycl-max-work-group-size"="8" +// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { {{.*}}"sycl-max-work-group-size"="8,4" +// CHECK-IR: attributes #[[MaxWGSizeAttr2]] = { {{.*}}"sycl-max-work-group-size"="8,4,2" + +// CHECK-IR: ![[MaxWGSizeMD0]] = !{i64 8} +// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 4, i64 8} +// CHECK-IR: ![[MaxWGSizeMD2]] = !{i64 2, i64 4, i64 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp new file mode 100644 index 0000000000000..449310e51fb5f --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp @@ -0,0 +1,51 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props1 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8>}; + constexpr auto Props2 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4>}; + constexpr auto Props3 = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>}; + + // CHECK-IR: define{{.*}}void @[[MaxWGSizeKernelFn0:.*MaxWGSizeKernel0]](){{.*}} #[[MaxWGSizeAttr0:[0-9]+]] + Q.single_task(Props1, []() {}); + + // CHECK-IR: define{{.*}}void @[[MaxWGSizeKernelFn1:.*MaxWGSizeKernel1]](){{.*}} #[[MaxWGSizeAttr1:[0-9]+]] + Q.single_task(Props2, []() {}); + + // CHECK-IR: define{{.*}}void @[[MaxWGSizeKernelFn2:.*MaxWGSizeKernel2]](){{.*}} #[[MaxWGSizeAttr2:[0-9]+]] + Q.single_task(Props3, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[MaxWGSizeAttr0]] = { +// CHECK-IR-SAME: "sycl-max-work-group-size"="8" + +// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { +// CHECK-IR-SAME: "sycl-max-work-group-size"="8,4" + +// CHECK-IR: attributes #[[MaxWGSizeAttr2]] = { +// CHECK-IR-SAME: "sycl-max-work-group-size"="8,4,2" + +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn0]], !"kernel", i32 1} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn0]], !"maxntidx", i32 8} +// CHECK-IR-NOT: !{ptr @[[MaxWGSizeKernelFn0]], !"maxntidy", +// CHECK-IR-NOT: !{ptr @[[MaxWGSizeKernelFn0]], !"maxntidz", + +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"kernel", i32 1} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"maxntidx", i32 4} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"maxntidy", i32 8} +// CHECK-IR-NOT: !{ptr @[[MaxWGSizeKernelFn1]], !"maxntidz", + +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"kernel", i32 1} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"maxntidx", i32 2} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"maxntidy", i32 4} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"maxntidz", i32 8} diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index 3868c23f7535c..4539b44d08305 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -50,6 +50,12 @@ int main() { static_assert( is_property_value)>::value); static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert( + is_property_value)>::value); + static_assert( + is_property_value)>::value); static_assert( std::is_same_v)::key_t>); @@ -66,6 +72,15 @@ int main() { decltype(work_group_size_hint<13, 13, 13>)::key_t>); static_assert( std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); static_assert(work_group_size<15>[0] == 15); static_assert(work_group_size<16, 17>[0] == 16); @@ -80,6 +95,13 @@ int main() { static_assert(work_group_size_hint<24, 25, 26>[1] == 25); static_assert(work_group_size_hint<24, 25, 26>[2] == 26); static_assert(sub_group_size<27>.value == 27); + static_assert(max_work_group_size<28>[0] == 28); + static_assert(max_work_group_size<28, 29>[0] == 28); + static_assert(max_work_group_size<28, 29>[1] == 29); + static_assert(max_work_group_size<28, 29, 30>[0] == 28); + static_assert(max_work_group_size<28, 29, 30>[1] == 29); + static_assert(max_work_group_size<28, 29, 30>[2] == 30); + static_assert(max_total_work_group_size<28>.value == 28); static_assert(std::is_same_v)::value_t, std::integral_constant>); From b4d3bf13007b3dfe6605dc91974dfeda77f666b9 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 10 Jul 2024 16:42:30 +0100 Subject: [PATCH 02/17] feedback: total -> linear --- clang/lib/CodeGen/Targets/NVPTX.cpp | 2 +- ...sycl_ext_oneapi_kernel_properties.asciidoc | 30 +++++++++---------- .../oneapi/kernel_properties/properties.hpp | 18 +++++------ .../sycl/ext/oneapi/properties/property.hpp | 2 +- .../properties_kernel_launch_bounds.cpp | 4 +-- .../properties_kernel_launch_bounds_nvptx.cpp | 4 +-- .../properties/properties_kernel.cpp | 9 +++--- 7 files changed, 34 insertions(+), 35 deletions(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 5ce4e0ae63ff6..b7aba95ea4801 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -343,7 +343,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } } - if (auto Attr = F->getFnAttribute("sycl-max-total-work-group-size"); + if (auto Attr = F->getFnAttribute("sycl-max-linear-work-group-size"); Attr.isValid()) { size_t Value = 0; bool Error = Attr.getValueAsString().getAsInteger(10, Value); diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc index dba3b3db03214..d06c4ca233fc5 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc @@ -121,9 +121,9 @@ Most of the kernel properties below correspond to kernel attributes defined in Section 5.8.1 of the SYCL 2020 specification. Note that deprecated attributes (such as `vec_type_hint`) are not included. -The `max_work_group_size` and `max_total_work_group_size` kernel properties are -also provided as complements to other properties concerning work-group sizes, -without a corresponding function attribute form. +The `max_work_group_size` and `max_linear_work_group_size` kernel properties +are also provided as complements to other properties concerning work-group +sizes, without a corresponding function attribute form. ```c++ namespace sycl { @@ -149,10 +149,10 @@ struct max_work_group_size_key { using value_t = property_value...>; }; // max_work_group_size_key -struct max_total_work_group_size_key { - template - using value_t = property_value>; -}; // max_total_work_group_size_key +struct max_linear_work_group_size_key { + template + using value_t = property_value>; +}; // max_linear_work_group_size_key // Corresponds to reqd_sub_group_size struct sub_group_size_key { @@ -194,7 +194,7 @@ template inline constexpr max_work_group_size_key::value_t max_work_group_size; template -inline constexpr max_total_work_group_size_key::value_t max_total_work_group_size; +inline constexpr max_linear_work_group_size_key::value_t max_linear_work_group_size; template inline constexpr sub_group_size_key::value_t sub_group_size; @@ -205,7 +205,7 @@ inline constexpr device_has_key::value_t device_has; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; -template <> struct is_property_key : std::true_type {}; +template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; @@ -236,18 +236,16 @@ template <> struct is_property_key : std::true_type {}; arguments matches the constructor of the `range` class. |`max_work_group_size` -|The `max_total_work_group_size` property provides a promise to the compiler +|The `max_work_group_size` property provides a promise to the compiler that the kernel will never be launched with a larger work-group than the specified size. The number of template arguments in the `Dims` parameter pack must match the dimensionality of the work-group used to invoke the kernel. The order of the template arguments matches the constructor of the `range` class. -|`max_total_work_group_size` -|The `max_total_work_group_size` property provides a promise to the compiler -that the kernel will never be launched with a work-group with a larger combined -size the specified amount. The combined work-group size of work-items is -determined as the multiplicative product of the work-group size across all -dimensions of the work-group. +|`max_linear_work_group_size` +|The `max_linear_work_group_size` property provides a promise to the compiler +that the kernel will never be launched with a work-group for which the return +value of `group::get_local_linear_range()` exceeds the specified amount. |`sub_group_size` |The `sub_group_size` property adds the requirement that the kernel must be diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index e1d206a7e223e..a801763534ed0 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -78,11 +78,11 @@ struct max_work_group_size_key std::integral_constant...>; }; -struct max_total_work_group_size_key +struct max_linear_work_group_size_key : detail::compile_time_property_key< - detail::PropKind::MaxTotalWorkGroupSize> { + detail::PropKind::MaxLinearWorkGroupSize> { template - using value_t = property_value>; }; @@ -171,8 +171,8 @@ struct property_value struct property_value { - using key_t = max_total_work_group_size_key; +template <> struct property_value { + using key_t = max_linear_work_group_size_key; }; template @@ -198,8 +198,8 @@ inline constexpr max_work_group_size_key::value_t max_work_group_size; template -inline constexpr max_total_work_group_size_key::value_t - max_total_work_group_size; +inline constexpr max_linear_work_group_size_key::value_t + max_linear_work_group_size; struct work_group_progress_key : detail::compile_time_property_key { @@ -321,8 +321,8 @@ struct PropertyMetaInfo> { static constexpr const char *value = SizeListToStr::value; }; template -struct PropertyMetaInfo> { - static constexpr const char *name = "sycl-max-total-work-group-size"; +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-linear-work-group-size"; static constexpr size_t value = Size; }; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 9b24ffefe88a0..ef431fb2fe7ea 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -213,7 +213,7 @@ enum PropKind : uint32_t { RegisteredKernelNames = 72, ClusterLaunch = 73, MaxWorkGroupSize = 74, - MaxTotalWorkGroupSize = 75, + MaxLinearWorkGroupSize = 75, // PropKindSize must always be the last value. PropKindSize = 76, }; diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp index 1efa8375b662e..96ac3da42a504 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp @@ -8,7 +8,7 @@ int main() { sycl::queue Q; constexpr auto Props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_total_work_group_size<4>, + sycl::ext::oneapi::experimental::max_linear_work_group_size<4>, }; // CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] Q.single_task(Props, []() {}); @@ -17,4 +17,4 @@ int main() { } // CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { -// CHECK-IR-SAME: "sycl-max-total-work-group-size"="4" +// CHECK-IR-SAME: "sycl-max-linear-work-group-size"="4" diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp index 18d5d94538133..2040c5418f7c6 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp @@ -8,7 +8,7 @@ int main() { sycl::queue Q; constexpr auto Props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_total_work_group_size<4>, + sycl::ext::oneapi::experimental::max_linear_work_group_size<4>, }; // CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] @@ -18,7 +18,7 @@ int main() { } // CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { -// CHECK-IR-SAME: "sycl-max-total-work-group-size"="4" +// CHECK-IR-SAME: "sycl-max-linear-work-group-size"="4" // CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1} // CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"maxntidx", i32 4} diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index 4539b44d08305..febb1849e331c 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -55,7 +55,7 @@ int main() { static_assert( is_property_value)>::value); static_assert( - is_property_value)>::value); + is_property_value)>::value); static_assert( std::is_same_v)::key_t>); @@ -79,8 +79,9 @@ int main() { static_assert( std::is_same_v)::key_t>); - static_assert(std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); static_assert(work_group_size<15>[0] == 15); static_assert(work_group_size<16, 17>[0] == 16); @@ -101,7 +102,7 @@ int main() { static_assert(max_work_group_size<28, 29, 30>[0] == 28); static_assert(max_work_group_size<28, 29, 30>[1] == 29); static_assert(max_work_group_size<28, 29, 30>[2] == 30); - static_assert(max_total_work_group_size<28>.value == 28); + static_assert(max_linear_work_group_size<28>.value == 28); static_assert(std::is_same_v)::value_t, std::integral_constant>); From a9b43f2973c200d70a0b3f3294fc96b830430390 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 11 Jul 2024 10:33:47 +0100 Subject: [PATCH 03/17] Update sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc Co-authored-by: Greg Lueck --- .../experimental/sycl_ext_oneapi_kernel_properties.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc index d06c4ca233fc5..4a75cbbe2af75 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc @@ -58,7 +58,7 @@ Jessica Davies, Intel + Joe Garvey, Intel + Greg Lueck, Intel + John Pennycook, Intel + -Roland Schulz, Intel +Roland Schulz, Intel + Fraser Cormack, Codeplay == Overview From f0ab74cd9d79a3e65a6f083388206ef04b3fee0b Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 16 Jul 2024 19:59:05 +0100 Subject: [PATCH 04/17] feedback: maybe_unused; delete comment; update spec for exception wording --- clang/lib/CodeGen/Targets/NVPTX.cpp | 5 ++--- .../sycl_ext_oneapi_kernel_properties.asciidoc | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 930e3f38f883d..2bcfda81e5e76 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -346,11 +346,10 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( if (auto Attr = F->getFnAttribute("sycl-max-linear-work-group-size"); Attr.isValid()) { size_t Value = 0; - bool Error = Attr.getValueAsString().getAsInteger(10, Value); + [[maybe_unused]] bool Error = Attr.getValueAsString().getAsInteger(10, Value); assert(!Error && "The attribute's value is not a number"); - if (llvm::isUInt<31>(Value)) { + if (llvm::isUInt<31>(Value)) addNVVMMetadata(F, "maxntidx", Value); - } } if (const auto *RWGS = FD->getAttr()) { diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc index 4a75cbbe2af75..3847eb8671ef6 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc @@ -143,7 +143,6 @@ struct work_group_size_hint_key { using value_t = property_value...>; }; // work_group_size_hint_key -// Corresponds to max_work_group_size struct max_work_group_size_key { template using value_t = property_value...>; @@ -242,11 +241,19 @@ specified size. The number of template arguments in the `Dims` parameter pack must match the dimensionality of the work-group used to invoke the kernel. The order of the template arguments matches the constructor of the `range` class. +If the kernel is submitted with an `nd_range` that exceeds the size specified +by the property, the implementation must throw a synchronous exception with the +`errc::nd_range` error code. + |`max_linear_work_group_size` |The `max_linear_work_group_size` property provides a promise to the compiler that the kernel will never be launched with a work-group for which the return value of `group::get_local_linear_range()` exceeds the specified amount. +If the kernel is submitted with an `nd_range` that exceeds the size specified +by the property, the implementation must throw a synchronous exception with the +`errc::nd_range` error code. + |`sub_group_size` |The `sub_group_size` property adds the requirement that the kernel must be compiled and executed with the specified sub-group size. An implementation may From d6d2892fb45345991f2945143651993c6c509eac Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 18 Jul 2024 14:28:04 +0100 Subject: [PATCH 05/17] update llvm-spirv --- llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp b/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp index 8a977513ceaba..a761bfbe4f435 100644 --- a/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp +++ b/llvm-spirv/lib/SPIRV/PreprocessMetadata.cpp @@ -187,16 +187,17 @@ void PreprocessMetadataBase::visit(Module *M) { // i32 Y, i32 Z} if (MDNode *MaxWorkgroupSizeINTEL = Kernel.getMetadata(kSPIR2MD::MaxWGSize)) { - assert(MaxWorkgroupSizeINTEL->getNumOperands() == 3 && - "max_work_group_size does not have 3 operands."); + assert(MaxWorkgroupSizeINTEL->getNumOperands() >= 1 && + MaxWorkgroupSizeINTEL->getNumOperands() <= 3 && + "max_work_group_size does not have between 1 and 3 operands."); SmallVector DecodedVals = decodeMDNode(MaxWorkgroupSizeINTEL); EM.addOp() .add(&Kernel) .add(spv::ExecutionModeMaxWorkgroupSizeINTEL) .add(DecodedVals[0]) - .add(DecodedVals[1]) - .add(DecodedVals[2]) + .add(DecodedVals.size() >= 2 ? DecodedVals[1] : 1) + .add(DecodedVals.size() == 3 ? DecodedVals[2] : 1) .done(); } From 6ee833c851e890c671f2972392ddf9d57b246509 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 5 Aug 2024 18:39:26 +0100 Subject: [PATCH 06/17] update tests --- .../lib/SYCLLowerIR/CompileTimePropertiesPass.cpp | 15 +++++++++++++++ .../lib/SYCLLowerIR/SYCLCreateNVVMAnnotations.cpp | 3 ++- .../properties_kernel_max_work_group_size.cpp | 4 ++-- ...roperties_kernel_max_work_group_size_nvptx.cpp | 5 +++-- 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 921b4bc8f61cd..15c0fb78ccaaa 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -416,6 +416,21 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { MDNode::get(Ctx, MD)); } + if (AttrKindStr == "sycl-max-linear-work-group-size") { + auto MaxLinearSize = getAttributeAsInteger(Attr); + // Use integer pointer size as closest analogue to size_t. + IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx); + IntegerType *SizeTTy = Type::getIntNTy(Ctx, IntPtrTy->getBitWidth()); + unsigned SizeTBitSize = SizeTTy->getBitWidth(); + + // Get the integers from the strings. + Metadata *MD = ConstantAsMetadata::get(Constant::getIntegerValue( + SizeTTy, APInt(SizeTBitSize, MaxLinearSize, 10))); + + return std::pair("max_linear_work_group_size", + MDNode::get(Ctx, MD)); + } + // The sycl-single-task attribute currently only has an effect when targeting // SPIR FPGAs, in which case it will generate a "max_global_work_dim" MD node // with a 0 value, similar to applying [[intel::max_global_work_dim(0)]] to diff --git a/llvm/lib/SYCLLowerIR/SYCLCreateNVVMAnnotations.cpp b/llvm/lib/SYCLLowerIR/SYCLCreateNVVMAnnotations.cpp index 5c718716c7414..635653d53114d 100644 --- a/llvm/lib/SYCLLowerIR/SYCLCreateNVVMAnnotations.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLCreateNVVMAnnotations.cpp @@ -96,7 +96,8 @@ SYCLCreateNVVMAnnotationsPass::run(Module &M, ModuleAnalysisManager &MAM) { constexpr static std::pair SingleValAnnotations[] = {{"min_work_groups_per_cu", "minctasm"}, - {"max_work_groups_per_mp", "maxclusterrank"}}; + {"max_work_groups_per_mp", "maxclusterrank"}, + {"max_linear_work_group_size", "maxntidx"}}; for (auto &[MDName, AnnotationName] : SingleValAnnotations) { if (MDNode *Node = F.getMetadata(MDName)) { diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp index 6126cf197819d..924270bb6cafe 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp @@ -32,6 +32,6 @@ int main() { // CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { {{.*}}"sycl-max-work-group-size"="8,4" // CHECK-IR: attributes #[[MaxWGSizeAttr2]] = { {{.*}}"sycl-max-work-group-size"="8,4,2" -// CHECK-IR: ![[MaxWGSizeMD0]] = !{i64 8} -// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 4, i64 8} +// CHECK-IR: ![[MaxWGSizeMD0]] = !{i64 8, i64 1, i64 1} +// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 4, i64 8, i64 1} // CHECK-IR: ![[MaxWGSizeMD2]] = !{i64 2, i64 4, i64 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp index 449310e51fb5f..af20bb82650fa 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp @@ -36,16 +36,17 @@ int main() { // CHECK-IR-SAME: "sycl-max-work-group-size"="8,4,2" // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn0]], !"kernel", i32 1} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"kernel", i32 1} +// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"kernel", i32 1} + // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn0]], !"maxntidx", i32 8} // CHECK-IR-NOT: !{ptr @[[MaxWGSizeKernelFn0]], !"maxntidy", // CHECK-IR-NOT: !{ptr @[[MaxWGSizeKernelFn0]], !"maxntidz", -// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"kernel", i32 1} // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"maxntidx", i32 4} // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn1]], !"maxntidy", i32 8} // CHECK-IR-NOT: !{ptr @[[MaxWGSizeKernelFn1]], !"maxntidz", -// CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"kernel", i32 1} // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"maxntidx", i32 2} // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"maxntidy", i32 4} // CHECK-IR: !{ptr @[[MaxWGSizeKernelFn2]], !"maxntidz", i32 8} From 7722aacc09df564426da7e76611ed7d8fa167056 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 6 Aug 2024 14:51:55 +0100 Subject: [PATCH 07/17] emit to program metadata; add tests --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 73 ++++--- .../max_linear_work_group_size_props.cpp | 189 +++++++++++++++++ .../Basic/max_work_group_size_props.cpp | 198 ++++++++++++++++++ 3 files changed, 436 insertions(+), 24 deletions(-) create mode 100644 sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp create mode 100644 sycl/test-e2e/Basic/max_work_group_size_props.cpp diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 0c00134a2effb..3e083ec5b24d9 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -107,28 +107,35 @@ std::vector getKernelNamesUsingAssert(const Module &M) { return SPIRKernelNames; } -// Gets reqd_work_group_size information for function Func. -std::vector getKernelReqdWorkGroupSizeMetadata(const Function &Func) { - MDNode *ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size"); - if (!ReqdWorkGroupSizeMD) +// Gets 1- to 3-dimension work-group related information for function Func. +// Returns an empty vector if not present. +template +std::vector getKernelWorkGroupMetadata(const Function &Func, + const char *MDName) { + MDNode *WorkGroupMD = Func.getMetadata(MDName); + if (!WorkGroupMD) return {}; - size_t NumOperands = ReqdWorkGroupSizeMD->getNumOperands(); + size_t NumOperands = WorkGroupMD->getNumOperands(); assert(NumOperands >= 1 && NumOperands <= 3 && - "reqd_work_group_size does not have between 1 and 3 operands."); - std::vector OutVals; + "work-group metadata does not have between 1 and 3 operands."); + std::vector OutVals; OutVals.reserve(NumOperands); - for (const MDOperand &MDOp : ReqdWorkGroupSizeMD->operands()) + for (const MDOperand &MDOp : WorkGroupMD->operands()) OutVals.push_back(mdconst::extract(MDOp)->getZExtValue()); return OutVals; } -// Gets work_group_num_dim information for function Func, conviniently 0 if -// metadata is not present. -uint32_t getKernelWorkGroupNumDim(const Function &Func) { - MDNode *MaxDimMD = Func.getMetadata("work_group_num_dim"); - if (!MaxDimMD) - return 0; - assert(MaxDimMD->getNumOperands() == 1 && "Malformed node."); - return mdconst::extract(MaxDimMD->getOperand(0))->getZExtValue(); + +// Gets a single-dimensional piece of information for function Func. +// Returns std::nullopt if metadata is not present. +template +std::optional getKernelSingleEltMetadata(const Function &Func, + const char *MDName) { + if (MDNode *MaxDimMD = Func.getMetadata(MDName)) { + assert(MaxDimMD->getNumOperands() == 1 && "Malformed node."); + return mdconst::extract(MaxDimMD->getOperand(0)) + ->getZExtValue(); + } + return std::nullopt; } PropSetRegTy computeModuleProperties(const Module &M, @@ -235,22 +242,40 @@ PropSetRegTy computeModuleProperties(const Module &M, SmallVector MetadataNames; if (GlobProps.EmitProgramMetadata) { - // Add reqd_work_group_size and work_group_num_dim information to - // program metadata. + // Add various pieces of function metadata to program metadata. for (const Function &Func : M.functions()) { - std::vector KernelReqdWorkGroupSize = - getKernelReqdWorkGroupSizeMetadata(Func); - if (!KernelReqdWorkGroupSize.empty()) { + // Note - we're implicitly truncating 64-bit work-group data to 32 bits in + // all work-group related metadata. All current consumers of this program + // metadata format only support SYCL ID queries that fit within MAX_INT. + if (auto KernelReqdWorkGroupSize = getKernelWorkGroupMetadata( + Func, "reqd_work_group_size"); + !KernelReqdWorkGroupSize.empty()) { MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), KernelReqdWorkGroupSize); } - uint32_t WorkGroupNumDim = getKernelWorkGroupNumDim(Func); - if (WorkGroupNumDim) { + if (auto WorkGroupNumDim = getKernelSingleEltMetadata( + Func, "work_group_num_dim")) { MetadataNames.push_back(Func.getName().str() + "@work_group_num_dim"); PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), - WorkGroupNumDim); + *WorkGroupNumDim); + } + + if (auto KernelMaxWorkGroupSize = + getKernelWorkGroupMetadata(Func, "max_work_group_size"); + !KernelMaxWorkGroupSize.empty()) { + MetadataNames.push_back(Func.getName().str() + "@max_work_group_size"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + KernelMaxWorkGroupSize); + } + + if (auto MaxLinearWGSize = getKernelSingleEltMetadata( + Func, "max_linear_work_group_size")) { + MetadataNames.push_back(Func.getName().str() + + "@max_linear_work_group_size"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + *MaxLinearWGSize); } } diff --git a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp new file mode 100644 index 0000000000000..dac8dc3f1b4a0 --- /dev/null +++ b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp @@ -0,0 +1,189 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item<1>) const {} + void operator()(item<1>) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_linear_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = 1; + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(I)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(I)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(I))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(I))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_linear_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + return test_max<4>(Q); +} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_work_group_size_props.cpp new file mode 100644 index 0000000000000..bfc8fef1df916 --- /dev/null +++ b/sycl/test-e2e/Basic/max_work_group_size_props.cpp @@ -0,0 +1,198 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + int Res = 0; + Res += test_max<4>(Q); + Res += test_max<4, 4>(Q); + Res += test_max<8, 4>(Q); + Res += test_max<4, 8>(Q); + Res += test_max<4, 4, 4>(Q); + Res += test_max<4, 4, 8>(Q); + Res += test_max<8, 4, 4>(Q); + Res += test_max<4, 8, 4>(Q); + return Res; +} From 7a084884e0e93448f69a22036b581e6aa6221f0e Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 14 Aug 2024 11:28:27 +0100 Subject: [PATCH 08/17] test different backends --- .../Basic/max_work_group_size_props_acc.cpp | 199 ++++++++++++++++++ .../Basic/max_work_group_size_props_cpu.cpp | 199 ++++++++++++++++++ .../Basic/max_work_group_size_props_gpu.cpp | 199 ++++++++++++++++++ .../Basic/max_work_group_size_props_lzo.cpp | 199 ++++++++++++++++++ .../Basic/max_work_group_size_props_ocl.cpp | 199 ++++++++++++++++++ 5 files changed, 995 insertions(+) create mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp create mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp create mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp create mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp create mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp new file mode 100644 index 0000000000000..a557444a4d593 --- /dev/null +++ b/sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp @@ -0,0 +1,199 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// REQUIRES: accelerator +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + int Res = 0; + Res += test_max<4>(Q); + Res += test_max<4, 4>(Q); + Res += test_max<8, 4>(Q); + Res += test_max<4, 8>(Q); + Res += test_max<4, 4, 4>(Q); + Res += test_max<4, 4, 8>(Q); + Res += test_max<8, 4, 4>(Q); + Res += test_max<4, 8, 4>(Q); + return Res; +} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp new file mode 100644 index 0000000000000..f75f2fdd0b315 --- /dev/null +++ b/sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp @@ -0,0 +1,199 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// REQUIRES: cpu +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + int Res = 0; + Res += test_max<4>(Q); + Res += test_max<4, 4>(Q); + Res += test_max<8, 4>(Q); + Res += test_max<4, 8>(Q); + Res += test_max<4, 4, 4>(Q); + Res += test_max<4, 4, 8>(Q); + Res += test_max<8, 4, 4>(Q); + Res += test_max<4, 8, 4>(Q); + return Res; +} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp new file mode 100644 index 0000000000000..463d685a39fed --- /dev/null +++ b/sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp @@ -0,0 +1,199 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// REQUIRES: gpu +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + int Res = 0; + Res += test_max<4>(Q); + Res += test_max<4, 4>(Q); + Res += test_max<8, 4>(Q); + Res += test_max<4, 8>(Q); + Res += test_max<4, 4, 4>(Q); + Res += test_max<4, 4, 8>(Q); + Res += test_max<8, 4, 4>(Q); + Res += test_max<4, 8, 4>(Q); + return Res; +} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp new file mode 100644 index 0000000000000..d1cffcc9ff3cf --- /dev/null +++ b/sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp @@ -0,0 +1,199 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// REQUIRES: level_zero +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + int Res = 0; + Res += test_max<4>(Q); + Res += test_max<4, 4>(Q); + Res += test_max<8, 4>(Q); + Res += test_max<4, 8>(Q); + Res += test_max<4, 4, 4>(Q); + Res += test_max<4, 4, 8>(Q); + Res += test_max<8, 4, 4>(Q); + Res += test_max<4, 8, 4>(Q); + return Res; +} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp new file mode 100644 index 0000000000000..22e9e5fb9ba90 --- /dev/null +++ b/sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp @@ -0,0 +1,199 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +// REQUIRES: opencl +#include + +#include + +using namespace sycl; + +enum class Variant { Function, Functor, FunctorAndProperty }; + +template +class ReqdWGSizePositiveA; +template +class ReqdWGSizeNoLocalPositive; +template +class ReqdWGSizeNegativeA; + +template range repeatRange(size_t Val); +template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } +template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } +template <> range<3> repeatRange<3>(size_t Val) { + return range<3>{Val, Val, Val}; +} + +std::string rangeToString(range<1> Range) { + return "{1, 1, " + std::to_string(Range[0]) + "}"; +} +std::string rangeToString(range<2> Range) { + return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + "}"; +} +std::string rangeToString(range<3> Range) { + return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + + ", " + std::to_string(Range[2]) + "}"; +} + +template struct KernelFunctorWithMaxWGSizeProp { + void operator()(nd_item) const {} + void operator()(item) const {} + + auto get(sycl::ext::oneapi::experimental::properties_tag) { + return sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size}; + } +}; + +template +int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { + constexpr size_t Dims = sizeof...(Is); + + bool IsOpenCL = (Q.get_backend() == backend::opencl); + + // Positive test case: Specify local size that matches required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(8), range(Is...)), Props, + KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + // Kernel that has a required WG size, but no local size is specified. + // + // TODO: This fails on OpenCL and should be investigated. + if (!IsOpenCL) { + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for< + ReqdWGSizeNoLocalPositive>( + repeatRange(16), Props, KernelFunc); + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Negative test case: Specify local size that does not match required size. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(2)), Props, + KernelFunc); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } + + // Same as above but using the queue shortcuts. + try { + Q.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, + KernelFunc); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + "has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (exception &E) { + if (E.code() != errc::nd_range || + std::string(E.what()).find( + "The specified local size " + rangeToString(repeatRange(8)) + + " doesn't match the required " + + "work-group size specified in the program source " + + rangeToString(range(Is...))) == std::string::npos) { + std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + } + + return 0; +} + +template int test_max(queue &Q) { + auto Props = ext::oneapi::experimental::properties{ + ext::oneapi::experimental::max_work_group_size}; + auto KernelFunction = [](auto) {}; + + auto EmptyProps = ext::oneapi::experimental::properties{}; + KernelFunctorWithMaxWGSizeProp KernelFunctor; + + int Res = 0; + Res += test(Q, Props, KernelFunction); + Res += test(Q, EmptyProps, KernelFunctor); + Res += test(Q, Props, KernelFunctor); + return Res; +} + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + int Res = 0; + Res += test_max<4>(Q); + Res += test_max<4, 4>(Q); + Res += test_max<8, 4>(Q); + Res += test_max<4, 8>(Q); + Res += test_max<4, 4, 4>(Q); + Res += test_max<4, 4, 8>(Q); + Res += test_max<8, 4, 4>(Q); + Res += test_max<4, 8, 4>(Q); + return Res; +} From ada9cb8ed1259672ddb99c2efc0027992e2dad64 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 14 Aug 2024 11:39:34 +0100 Subject: [PATCH 09/17] fix formatting --- llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp | 3 +-- sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 15c0fb78ccaaa..40af89ddcf26f 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -376,8 +376,7 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { Attr.getValueAsString().split(AttrValStrs, ','); size_t NumDims = AttrValStrs.size(); - assert(NumDims <= 3 && - "Incorrect number of values for kernel property"); + assert(NumDims <= 3 && "Incorrect number of values for kernel property"); // SYCL work-group sizes must be reversed for SPIR-V. std::reverse(AttrValStrs.begin(), AttrValStrs.end()); diff --git a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp index dac8dc3f1b4a0..34ff3d1e4130f 100644 --- a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp @@ -85,8 +85,7 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { if (!IsOpenCL) { try { Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( + CGH.parallel_for>( repeatRange(16), Props, KernelFunc); }); Q.wait_and_throw(); @@ -185,5 +184,5 @@ int main() { queue Q(AsyncHandler); - return test_max<4>(Q); + return test_max<4>(Q); } From e4e927251f7d160bc3e2c4e440b0e4b205eab876 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 20 Aug 2024 10:28:00 +0100 Subject: [PATCH 10/17] Revert "test different backends" This reverts commit 7a084884e0e93448f69a22036b581e6aa6221f0e. --- .../Basic/max_work_group_size_props_acc.cpp | 199 ------------------ .../Basic/max_work_group_size_props_cpu.cpp | 199 ------------------ .../Basic/max_work_group_size_props_gpu.cpp | 199 ------------------ .../Basic/max_work_group_size_props_lzo.cpp | 199 ------------------ .../Basic/max_work_group_size_props_ocl.cpp | 199 ------------------ 5 files changed, 995 deletions(-) delete mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp delete mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp delete mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp delete mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp delete mode 100644 sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp deleted file mode 100644 index a557444a4d593..0000000000000 --- a/sycl/test-e2e/Basic/max_work_group_size_props_acc.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// REQUIRES: accelerator -#include - -#include - -using namespace sycl; - -enum class Variant { Function, Functor, FunctorAndProperty }; - -template -class ReqdWGSizePositiveA; -template -class ReqdWGSizeNoLocalPositive; -template -class ReqdWGSizeNegativeA; - -template range repeatRange(size_t Val); -template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } -template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } -template <> range<3> repeatRange<3>(size_t Val) { - return range<3>{Val, Val, Val}; -} - -std::string rangeToString(range<1> Range) { - return "{1, 1, " + std::to_string(Range[0]) + "}"; -} -std::string rangeToString(range<2> Range) { - return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - "}"; -} -std::string rangeToString(range<3> Range) { - return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - ", " + std::to_string(Range[2]) + "}"; -} - -template struct KernelFunctorWithMaxWGSizeProp { - void operator()(nd_item) const {} - void operator()(item) const {} - - auto get(sycl::ext::oneapi::experimental::properties_tag) { - return sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_work_group_size}; - } -}; - -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { - constexpr size_t Dims = sizeof...(Is); - - bool IsOpenCL = (Q.get_backend() == backend::opencl); - - // Positive test case: Specify local size that matches required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - // Kernel that has a required WG size, but no local size is specified. - // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( - repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Negative test case: Specify local size that does not match required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, - KernelFunc); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " - "has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - } - - return 0; -} - -template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; - KernelFunctorWithMaxWGSizeProp KernelFunctor; - - int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); - return Res; -} - -int main() { - auto AsyncHandler = [](exception_list ES) { - for (auto &E : ES) { - std::rethrow_exception(E); - } - }; - - queue Q(AsyncHandler); - - int Res = 0; - Res += test_max<4>(Q); - Res += test_max<4, 4>(Q); - Res += test_max<8, 4>(Q); - Res += test_max<4, 8>(Q); - Res += test_max<4, 4, 4>(Q); - Res += test_max<4, 4, 8>(Q); - Res += test_max<8, 4, 4>(Q); - Res += test_max<4, 8, 4>(Q); - return Res; -} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp deleted file mode 100644 index f75f2fdd0b315..0000000000000 --- a/sycl/test-e2e/Basic/max_work_group_size_props_cpu.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// REQUIRES: cpu -#include - -#include - -using namespace sycl; - -enum class Variant { Function, Functor, FunctorAndProperty }; - -template -class ReqdWGSizePositiveA; -template -class ReqdWGSizeNoLocalPositive; -template -class ReqdWGSizeNegativeA; - -template range repeatRange(size_t Val); -template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } -template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } -template <> range<3> repeatRange<3>(size_t Val) { - return range<3>{Val, Val, Val}; -} - -std::string rangeToString(range<1> Range) { - return "{1, 1, " + std::to_string(Range[0]) + "}"; -} -std::string rangeToString(range<2> Range) { - return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - "}"; -} -std::string rangeToString(range<3> Range) { - return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - ", " + std::to_string(Range[2]) + "}"; -} - -template struct KernelFunctorWithMaxWGSizeProp { - void operator()(nd_item) const {} - void operator()(item) const {} - - auto get(sycl::ext::oneapi::experimental::properties_tag) { - return sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_work_group_size}; - } -}; - -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { - constexpr size_t Dims = sizeof...(Is); - - bool IsOpenCL = (Q.get_backend() == backend::opencl); - - // Positive test case: Specify local size that matches required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - // Kernel that has a required WG size, but no local size is specified. - // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( - repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Negative test case: Specify local size that does not match required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, - KernelFunc); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " - "has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - } - - return 0; -} - -template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; - KernelFunctorWithMaxWGSizeProp KernelFunctor; - - int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); - return Res; -} - -int main() { - auto AsyncHandler = [](exception_list ES) { - for (auto &E : ES) { - std::rethrow_exception(E); - } - }; - - queue Q(AsyncHandler); - - int Res = 0; - Res += test_max<4>(Q); - Res += test_max<4, 4>(Q); - Res += test_max<8, 4>(Q); - Res += test_max<4, 8>(Q); - Res += test_max<4, 4, 4>(Q); - Res += test_max<4, 4, 8>(Q); - Res += test_max<8, 4, 4>(Q); - Res += test_max<4, 8, 4>(Q); - return Res; -} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp deleted file mode 100644 index 463d685a39fed..0000000000000 --- a/sycl/test-e2e/Basic/max_work_group_size_props_gpu.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// REQUIRES: gpu -#include - -#include - -using namespace sycl; - -enum class Variant { Function, Functor, FunctorAndProperty }; - -template -class ReqdWGSizePositiveA; -template -class ReqdWGSizeNoLocalPositive; -template -class ReqdWGSizeNegativeA; - -template range repeatRange(size_t Val); -template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } -template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } -template <> range<3> repeatRange<3>(size_t Val) { - return range<3>{Val, Val, Val}; -} - -std::string rangeToString(range<1> Range) { - return "{1, 1, " + std::to_string(Range[0]) + "}"; -} -std::string rangeToString(range<2> Range) { - return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - "}"; -} -std::string rangeToString(range<3> Range) { - return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - ", " + std::to_string(Range[2]) + "}"; -} - -template struct KernelFunctorWithMaxWGSizeProp { - void operator()(nd_item) const {} - void operator()(item) const {} - - auto get(sycl::ext::oneapi::experimental::properties_tag) { - return sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_work_group_size}; - } -}; - -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { - constexpr size_t Dims = sizeof...(Is); - - bool IsOpenCL = (Q.get_backend() == backend::opencl); - - // Positive test case: Specify local size that matches required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - // Kernel that has a required WG size, but no local size is specified. - // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( - repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Negative test case: Specify local size that does not match required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, - KernelFunc); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " - "has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - } - - return 0; -} - -template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; - KernelFunctorWithMaxWGSizeProp KernelFunctor; - - int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); - return Res; -} - -int main() { - auto AsyncHandler = [](exception_list ES) { - for (auto &E : ES) { - std::rethrow_exception(E); - } - }; - - queue Q(AsyncHandler); - - int Res = 0; - Res += test_max<4>(Q); - Res += test_max<4, 4>(Q); - Res += test_max<8, 4>(Q); - Res += test_max<4, 8>(Q); - Res += test_max<4, 4, 4>(Q); - Res += test_max<4, 4, 8>(Q); - Res += test_max<8, 4, 4>(Q); - Res += test_max<4, 8, 4>(Q); - return Res; -} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp deleted file mode 100644 index d1cffcc9ff3cf..0000000000000 --- a/sycl/test-e2e/Basic/max_work_group_size_props_lzo.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// REQUIRES: level_zero -#include - -#include - -using namespace sycl; - -enum class Variant { Function, Functor, FunctorAndProperty }; - -template -class ReqdWGSizePositiveA; -template -class ReqdWGSizeNoLocalPositive; -template -class ReqdWGSizeNegativeA; - -template range repeatRange(size_t Val); -template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } -template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } -template <> range<3> repeatRange<3>(size_t Val) { - return range<3>{Val, Val, Val}; -} - -std::string rangeToString(range<1> Range) { - return "{1, 1, " + std::to_string(Range[0]) + "}"; -} -std::string rangeToString(range<2> Range) { - return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - "}"; -} -std::string rangeToString(range<3> Range) { - return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - ", " + std::to_string(Range[2]) + "}"; -} - -template struct KernelFunctorWithMaxWGSizeProp { - void operator()(nd_item) const {} - void operator()(item) const {} - - auto get(sycl::ext::oneapi::experimental::properties_tag) { - return sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_work_group_size}; - } -}; - -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { - constexpr size_t Dims = sizeof...(Is); - - bool IsOpenCL = (Q.get_backend() == backend::opencl); - - // Positive test case: Specify local size that matches required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - // Kernel that has a required WG size, but no local size is specified. - // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( - repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Negative test case: Specify local size that does not match required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, - KernelFunc); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " - "has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - } - - return 0; -} - -template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; - KernelFunctorWithMaxWGSizeProp KernelFunctor; - - int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); - return Res; -} - -int main() { - auto AsyncHandler = [](exception_list ES) { - for (auto &E : ES) { - std::rethrow_exception(E); - } - }; - - queue Q(AsyncHandler); - - int Res = 0; - Res += test_max<4>(Q); - Res += test_max<4, 4>(Q); - Res += test_max<8, 4>(Q); - Res += test_max<4, 8>(Q); - Res += test_max<4, 4, 4>(Q); - Res += test_max<4, 4, 8>(Q); - Res += test_max<8, 4, 4>(Q); - Res += test_max<4, 8, 4>(Q); - return Res; -} diff --git a/sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp b/sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp deleted file mode 100644 index 22e9e5fb9ba90..0000000000000 --- a/sycl/test-e2e/Basic/max_work_group_size_props_ocl.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// RUN: %{build} -o %t.out -// RUN: %{run} %t.out - -// REQUIRES: opencl -#include - -#include - -using namespace sycl; - -enum class Variant { Function, Functor, FunctorAndProperty }; - -template -class ReqdWGSizePositiveA; -template -class ReqdWGSizeNoLocalPositive; -template -class ReqdWGSizeNegativeA; - -template range repeatRange(size_t Val); -template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } -template <> range<2> repeatRange<2>(size_t Val) { return range<2>{Val, Val}; } -template <> range<3> repeatRange<3>(size_t Val) { - return range<3>{Val, Val, Val}; -} - -std::string rangeToString(range<1> Range) { - return "{1, 1, " + std::to_string(Range[0]) + "}"; -} -std::string rangeToString(range<2> Range) { - return "{1, " + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - "}"; -} -std::string rangeToString(range<3> Range) { - return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + - ", " + std::to_string(Range[2]) + "}"; -} - -template struct KernelFunctorWithMaxWGSizeProp { - void operator()(nd_item) const {} - void operator()(item) const {} - - auto get(sycl::ext::oneapi::experimental::properties_tag) { - return sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::max_work_group_size}; - } -}; - -template -int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { - constexpr size_t Dims = sizeof...(Is); - - bool IsOpenCL = (Q.get_backend() == backend::opencl); - - // Positive test case: Specify local size that matches required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(8), range(Is...)), Props, - KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - // Kernel that has a required WG size, but no local size is specified. - // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( - repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Negative test case: Specify local size that does not match required size. - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, - KernelFunc); - }); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } - - // Same as above but using the queue shortcuts. - try { - Q.parallel_for>( - nd_range(repeatRange(16), repeatRange(8)), Props, - KernelFunc); - Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " - "has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (exception &E) { - if (E.code() != errc::nd_range || - std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(Is...))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - } - - return 0; -} - -template int test_max(queue &Q) { - auto Props = ext::oneapi::experimental::properties{ - ext::oneapi::experimental::max_work_group_size}; - auto KernelFunction = [](auto) {}; - - auto EmptyProps = ext::oneapi::experimental::properties{}; - KernelFunctorWithMaxWGSizeProp KernelFunctor; - - int Res = 0; - Res += test(Q, Props, KernelFunction); - Res += test(Q, EmptyProps, KernelFunctor); - Res += test(Q, Props, KernelFunctor); - return Res; -} - -int main() { - auto AsyncHandler = [](exception_list ES) { - for (auto &E : ES) { - std::rethrow_exception(E); - } - }; - - queue Q(AsyncHandler); - - int Res = 0; - Res += test_max<4>(Q); - Res += test_max<4, 4>(Q); - Res += test_max<8, 4>(Q); - Res += test_max<4, 8>(Q); - Res += test_max<4, 4, 4>(Q); - Res += test_max<4, 4, 8>(Q); - Res += test_max<8, 4, 4>(Q); - Res += test_max<4, 8, 4>(Q); - return Res; -} From ea9c29393840ccb405a41b97521a0c56c1122058 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 20 Aug 2024 17:35:42 +0100 Subject: [PATCH 11/17] update tests --- .../max_linear_work_group_size_props.cpp | 124 ++++++++++-------- .../Basic/max_work_group_size_props.cpp | 90 ++++++------- 2 files changed, 111 insertions(+), 103 deletions(-) diff --git a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp index 34ff3d1e4130f..cd5dca7db7084 100644 --- a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// This property is not yet supported by all UR adapters +// XFAIL: level_zero, opencl + #include #include @@ -10,11 +13,11 @@ using namespace sycl; enum class Variant { Function, Functor, FunctorAndProperty }; template -class ReqdWGSizePositiveA; +class MaxLinearWGSizePositive; template -class ReqdWGSizeNoLocalPositive; +class MaxLinearWGSizeNoLocalPositive; template -class ReqdWGSizeNegativeA; +class MaxLinearWGSizeNegative; template range repeatRange(size_t Val); template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } @@ -34,6 +37,15 @@ std::string rangeToString(range<3> Range) { return "{" + std::to_string(Range[0]) + ", " + std::to_string(Range[1]) + ", " + std::to_string(Range[2]) + "}"; } +std::string linearRangeToString(range<1> Range) { + return std::to_string(Range[0]); +} +std::string linearRangeToString(range<2> Range) { + return std::to_string(Range[0] * Range[1]); +} +std::string linearRangeToString(range<3> Range) { + return std::to_string(Range[0] * Range[1] * Range[2]); +} template struct KernelFunctorWithMaxWGSizeProp { void operator()(nd_item<1>) const {} @@ -50,84 +62,82 @@ template >( + CGH.parallel_for>( nd_range(repeatRange(8), range(I)), Props, KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; + std::cerr + << "Test case MaxLinearWGSizePositive failed: unexpected exception: " + << E.what() << std::endl; return 1; } // Same as above but using the queue shortcuts. try { - Q.parallel_for>( + Q.parallel_for>( nd_range(repeatRange(8), range(I)), Props, KernelFunc); Q.wait_and_throw(); } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; + std::cerr + << "Test case MaxLinearWGSizePositive shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; return 1; } // Kernel that has a required WG size, but no local size is specified. // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case MaxLinearWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case MaxLinearWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; } // Negative test case: Specify local size that does not match required size. try { Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, KernelFunc); }); Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; + std::cerr + << "Test case MaxLinearWGSizeNegative failed: no exception has been " + "thrown\n"; return 1; // We shouldn't be here, exception is expected } catch (exception &E) { if (E.code() != errc::nd_range || std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(I))) == std::string::npos) { + "The total number of work-items in the work-group (" + + linearRangeToString(repeatRange(8)) + + ") exceeds the maximum specified in the program source (" + + linearRangeToString(range(I)) + ")") == std::string::npos) { std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << "Test case MaxLinearWGSizeNegative failed: unexpected exception: " << E.what() << std::endl; return 1; } @@ -135,24 +145,26 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Same as above but using the queue shortcuts. try { - Q.parallel_for>( + Q.parallel_for>( nd_range(repeatRange(16), repeatRange(8)), Props, KernelFunc); Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " - "has been " - "thrown\n"; + std::cerr + << "Test case MaxLinearWGSizeNegative shortcut failed: no exception " + "has been " + "thrown\n"; return 1; // We shouldn't be here, exception is expected } catch (exception &E) { if (E.code() != errc::nd_range || std::string(E.what()).find( - "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + - rangeToString(range(I))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " - "exception: " - << E.what() << std::endl; + "The total number of work-items in the work-group (" + + linearRangeToString(repeatRange(8)) + + ") exceeds the maximum specified in the program source (" + + linearRangeToString(range(I)) + ")") == std::string::npos) { + std::cerr + << "Test case MaxLinearWGSizeNegative shortcut failed: unexpected " + "exception: " + << E.what() << std::endl; return 1; } } diff --git a/sycl/test-e2e/Basic/max_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_work_group_size_props.cpp index bfc8fef1df916..b159091176ca7 100644 --- a/sycl/test-e2e/Basic/max_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_work_group_size_props.cpp @@ -1,6 +1,9 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out +// This property is not yet supported by all UR adapters +// XFAIL: level_zero, opencl + #include #include @@ -10,11 +13,11 @@ using namespace sycl; enum class Variant { Function, Functor, FunctorAndProperty }; template -class ReqdWGSizePositiveA; +class MaxWGSizePositive; template -class ReqdWGSizeNoLocalPositive; +class MaxWGSizeNoLocalPositive; template -class ReqdWGSizeNegativeA; +class MaxWGSizeNegative; template range repeatRange(size_t Val); template <> range<1> repeatRange<1>(size_t Val) { return range<1>{Val}; } @@ -50,30 +53,28 @@ template >( + CGH.parallel_for>( nd_range(repeatRange(8), range(Is...)), Props, KernelFunc); }); Q.wait_and_throw(); } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + std::cerr << "Test case MaxWGSizePositive failed: unexpected exception: " << E.what() << std::endl; return 1; } // Same as above but using the queue shortcuts. try { - Q.parallel_for>( + Q.parallel_for>( nd_range(repeatRange(8), range(Is...)), Props, KernelFunc); Q.wait_and_throw(); } catch (exception &E) { - std::cerr << "Test case ReqdWGSizePositiveA shortcut failed: unexpected " + std::cerr << "Test case MaxWGSizePositive shortcut failed: unexpected " "exception: " << E.what() << std::endl; return 1; @@ -81,66 +82,61 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { // Kernel that has a required WG size, but no local size is specified. // - // TODO: This fails on OpenCL and should be investigated. - if (!IsOpenCL) { - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for< - ReqdWGSizeNoLocalPositive>( - repeatRange(16), Props, KernelFunc); - }); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive failed: unexpected " - "exception: " - << E.what() << std::endl; - return 1; - } - - try { - Q.parallel_for>( + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for>( repeatRange(16), Props, KernelFunc); - Q.wait_and_throw(); - } catch (exception &E) { - std::cerr << "Test case ReqdWGSizeNoLocalPositive shortcut failed: " - "unexpected exception: " - << E.what() << std::endl; - return 1; - } + }); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case MaxWGSizeNoLocalPositive failed: unexpected " + "exception: " + << E.what() << std::endl; + return 1; + } + + try { + Q.parallel_for>( + repeatRange(16), Props, KernelFunc); + Q.wait_and_throw(); + } catch (exception &E) { + std::cerr << "Test case MaxWGSizeNoLocalPositive shortcut failed: " + "unexpected exception: " + << E.what() << std::endl; + return 1; } // Negative test case: Specify local size that does not match required size. try { Q.submit([&](handler &CGH) { - CGH.parallel_for>( - nd_range(repeatRange(16), repeatRange(2)), Props, + CGH.parallel_for>( + nd_range(repeatRange(16), repeatRange(8)), Props, KernelFunc); }); Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + std::cerr << "Test case MaxWGSizeNegative failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } catch (exception &E) { if (E.code() != errc::nd_range || std::string(E.what()).find( "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + + " exceeds the maximum work-group size specified in the program " + "source " + rangeToString(range(Is...))) == std::string::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; + std::cerr << "Test case MaxWGSizeNegative failed: unexpected exception: " + << E.what() << std::endl; return 1; } } // Same as above but using the queue shortcuts. try { - Q.parallel_for>( + Q.parallel_for>( nd_range(repeatRange(16), repeatRange(8)), Props, KernelFunc); Q.wait_and_throw(); - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: no exception " + std::cerr << "Test case MaxWGSizeNegative shortcut failed: no exception " "has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected @@ -148,10 +144,10 @@ int test(queue &Q, PropertiesT Props, KernelType KernelFunc) { if (E.code() != errc::nd_range || std::string(E.what()).find( "The specified local size " + rangeToString(repeatRange(8)) + - " doesn't match the required " + - "work-group size specified in the program source " + + " exceeds the maximum work-group size specified in the program " + "source " + rangeToString(range(Is...))) == std::string::npos) { - std::cerr << "Test case ReqdWGSizeNegativeA shortcut failed: unexpected " + std::cerr << "Test case MaxWGSizeNegative shortcut failed: unexpected " "exception: " << E.what() << std::endl; return 1; From 5a2f3a64e8ee379113f8c622f3b648d7e7403d45 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 20 Aug 2024 17:54:09 +0100 Subject: [PATCH 12/17] add sycl runtime checking --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 4 +- .../detail/error_handling/error_handling.cpp | 44 +++++++++++++++++-- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index c4b159216f223..4329e6c53b9f9 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -146,8 +146,8 @@ if(SYCL_UR_USE_FETCH_CONTENT) ) fetch_adapter_source(cuda - ${UNIFIED_RUNTIME_REPO} - ${UNIFIED_RUNTIME_TAG} + https://github.com/frasercrmck/unified-runtime.git + 52918b62251fa9b61b959a6af705a57b36eb86aa ) fetch_adapter_source(hip diff --git a/sycl/source/detail/error_handling/error_handling.cpp b/sycl/source/detail/error_handling/error_handling.cpp index 3780b526b5fc3..b829e40eb0658 100644 --- a/sycl/source/detail/error_handling/error_handling.cpp +++ b/sycl/source/detail/error_handling/error_handling.cpp @@ -103,6 +103,16 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, UR_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, sizeof(size_t) * 3, CompileWGSize, nullptr); + size_t CompileMaxWGSize[3] = {0}; + Plugin->call(urKernelGetGroupInfo, Kernel, Device, + UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE, + sizeof(size_t) * 3, CompileMaxWGSize, nullptr); + + size_t CompileMaxLinearWGSize = 0; + Plugin->call(urKernelGetGroupInfo, Kernel, Device, + UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE, + sizeof(size_t), &CompileMaxLinearWGSize, nullptr); + size_t MaxWGSize = 0; Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaxWGSize, nullptr); @@ -145,7 +155,28 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, std::to_string(CompileWGSize[0]) + "}"); } + const size_t TotalNumberOfWIs = + NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; + if (HasLocalSize) { + if (CompileMaxWGSize[0] != 0) { + if (NDRDesc.LocalSize[0] > CompileMaxWGSize[0] || + NDRDesc.LocalSize[1] > CompileMaxWGSize[1] || + NDRDesc.LocalSize[2] > CompileMaxWGSize[2]) { + throw sycl::exception( + make_error_code(errc::nd_range), + "The specified local size {" + + std::to_string(NDRDesc.LocalSize[2]) + ", " + + std::to_string(NDRDesc.LocalSize[1]) + ", " + + std::to_string(NDRDesc.LocalSize[0]) + + "} exceeds the maximum work-group size specified " + "in the program source {" + + std::to_string(CompileMaxWGSize[2]) + ", " + + std::to_string(CompileMaxWGSize[1]) + ", " + + std::to_string(CompileMaxWGSize[0]) + "}"); + } + } + size_t MaxThreadsPerBlock[3] = {}; Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock), MaxThreadsPerBlock, nullptr); @@ -161,6 +192,15 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, "} for this device"); } } + + if (CompileMaxLinearWGSize && TotalNumberOfWIs > CompileMaxLinearWGSize) { + throw sycl::exception( + make_error_code(errc::nd_range), + "The total number of work-items in the work-group (" + + std::to_string(TotalNumberOfWIs) + + ") exceeds the maximum specified in the program source (" + + std::to_string(CompileMaxLinearWGSize) + ")"); + } } if (IsOpenCLV1x) { @@ -170,8 +210,6 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, // local_work_size[0] * ... * local_work_size[work_dim - 1] is greater // than the value specified by UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE in // table 4.3 - const size_t TotalNumberOfWIs = - NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > MaxWGSize) throw sycl::exception( make_error_code(errc::nd_range), @@ -188,8 +226,6 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, Plugin->call(urKernelGetGroupInfo, Kernel, Device, UR_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), &KernelWGSize, nullptr); - const size_t TotalNumberOfWIs = - NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > KernelWGSize) throw sycl::exception( make_error_code(errc::nd_range), From 63a776b37309183ade554bab1c0783b2ccc2aed7 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 20 Aug 2024 18:07:06 +0100 Subject: [PATCH 13/17] fix ur link --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 4329e6c53b9f9..7b8fb1e97c983 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,14 +116,15 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/frasercrmck/unified-runtime.git") + # commit cabf128094eff9ff7b79bdff559640a8a111f0c3 # Merge: a96fcbc5 15bca3b6 # Author: Omar Ahmed # Date: Mon Aug 19 16:20:45 2024 +0100 # Merge pull request #1984 from rafbiels/rafbiels/cuda-stream-race-cond # Fix race condition in CUDA stream creation - set(UNIFIED_RUNTIME_TAG cabf128094eff9ff7b79bdff559640a8a111f0c3) + set(UNIFIED_RUNTIME_TAG 52918b62251fa9b61b959a6af705a57b36eb86aa) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need @@ -146,8 +147,8 @@ if(SYCL_UR_USE_FETCH_CONTENT) ) fetch_adapter_source(cuda - https://github.com/frasercrmck/unified-runtime.git - 52918b62251fa9b61b959a6af705a57b36eb86aa + ${UNIFIED_RUNTIME_REPO} + ${UNIFIED_RUNTIME_TAG} ) fetch_adapter_source(hip From 7c5ed9f48b114837794ffb92d3bae9f415a76d70 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 21 Aug 2024 16:11:41 +0100 Subject: [PATCH 14/17] workaround unsupported --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- .../detail/error_handling/error_handling.cpp | 20 +++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 7b8fb1e97c983..5eead5838c3cf 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Date: Mon Aug 19 16:20:45 2024 +0100 # Merge pull request #1984 from rafbiels/rafbiels/cuda-stream-race-cond # Fix race condition in CUDA stream creation - set(UNIFIED_RUNTIME_TAG 52918b62251fa9b61b959a6af705a57b36eb86aa) + set(UNIFIED_RUNTIME_TAG 8165d6d94a46a395d966be08faa014bd09802009) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/source/detail/error_handling/error_handling.cpp b/sycl/source/detail/error_handling/error_handling.cpp index b829e40eb0658..4435caa5f8833 100644 --- a/sycl/source/detail/error_handling/error_handling.cpp +++ b/sycl/source/detail/error_handling/error_handling.cpp @@ -104,14 +104,22 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, CompileWGSize, nullptr); size_t CompileMaxWGSize[3] = {0}; - Plugin->call(urKernelGetGroupInfo, Kernel, Device, - UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE, - sizeof(size_t) * 3, CompileMaxWGSize, nullptr); + ur_result_t URRes = + Plugin->call_nocheck(urKernelGetGroupInfo, Kernel, Device, + UR_KERNEL_GROUP_INFO_COMPILE_MAX_WORK_GROUP_SIZE, + sizeof(size_t) * 3, CompileMaxWGSize, nullptr); + if (URRes != UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { + Plugin->checkUrResult(URRes); + } size_t CompileMaxLinearWGSize = 0; - Plugin->call(urKernelGetGroupInfo, Kernel, Device, - UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE, - sizeof(size_t), &CompileMaxLinearWGSize, nullptr); + URRes = Plugin->call_nocheck( + urKernelGetGroupInfo, Kernel, Device, + UR_KERNEL_GROUP_INFO_COMPILE_MAX_LINEAR_WORK_GROUP_SIZE, sizeof(size_t), + &CompileMaxLinearWGSize, nullptr); + if (URRes != UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION) { + Plugin->checkUrResult(URRes); + } size_t MaxWGSize = 0; Plugin->call(urDeviceGetInfo, Device, UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE, From 91d632fb7632f1d8130663c703e5104f768cc8a4 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 22 Aug 2024 11:37:07 +0100 Subject: [PATCH 15/17] bump metadata size; bump UR --- llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 2 +- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index ffe008ba6e04d..853b1f3a364af 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -275,7 +275,7 @@ PropSetRegTy computeModuleProperties(const Module &M, KernelMaxWorkGroupSize); } - if (auto MaxLinearWGSize = getKernelSingleEltMetadata( + if (auto MaxLinearWGSize = getKernelSingleEltMetadata( Func, "max_linear_work_group_size")) { MetadataNames.push_back(Func.getName().str() + "@max_linear_work_group_size"); diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 5eead5838c3cf..2e0a1c03089e3 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Date: Mon Aug 19 16:20:45 2024 +0100 # Merge pull request #1984 from rafbiels/rafbiels/cuda-stream-race-cond # Fix race condition in CUDA stream creation - set(UNIFIED_RUNTIME_TAG 8165d6d94a46a395d966be08faa014bd09802009) + set(UNIFIED_RUNTIME_TAG eb2d7d9455b7a7878154c0455e044a515aaa02f9) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need From 079948ba496ac8984f490f50618e12835a7b96b0 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 22 Aug 2024 16:05:30 +0100 Subject: [PATCH 16/17] bump --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp | 2 +- sycl/test-e2e/Basic/max_work_group_size_props.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 2e0a1c03089e3..8a59530d3cc82 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -124,7 +124,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) # Date: Mon Aug 19 16:20:45 2024 +0100 # Merge pull request #1984 from rafbiels/rafbiels/cuda-stream-race-cond # Fix race condition in CUDA stream creation - set(UNIFIED_RUNTIME_TAG eb2d7d9455b7a7878154c0455e044a515aaa02f9) + set(UNIFIED_RUNTIME_TAG b595cbf4dd67f98d873f5514a22d041306a07b8f) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") # Due to the use of dependentloadflag and no installer for UMF and hwloc we need diff --git a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp index cd5dca7db7084..7009ca367d8e9 100644 --- a/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_linear_work_group_size_props.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // This property is not yet supported by all UR adapters -// XFAIL: level_zero, opencl +// XFAIL: level_zero, opencl, hip #include diff --git a/sycl/test-e2e/Basic/max_work_group_size_props.cpp b/sycl/test-e2e/Basic/max_work_group_size_props.cpp index b159091176ca7..96439971d904a 100644 --- a/sycl/test-e2e/Basic/max_work_group_size_props.cpp +++ b/sycl/test-e2e/Basic/max_work_group_size_props.cpp @@ -2,7 +2,7 @@ // RUN: %{run} %t.out // This property is not yet supported by all UR adapters -// XFAIL: level_zero, opencl +// XFAIL: level_zero, opencl, hip #include From 1594d8ca2ce5a1ab914402a6c1f3af3d8b3b7516 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 27 Aug 2024 09:47:01 +0100 Subject: [PATCH 17/17] update docs --- ...sycl_ext_oneapi_kernel_properties.asciidoc | 104 +++++++++++------- 1 file changed, 62 insertions(+), 42 deletions(-) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc index 3847eb8671ef6..d0ae2a0727046 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_kernel_properties.asciidoc @@ -117,14 +117,10 @@ supports. === Kernel Properties -Most of the kernel properties below correspond to kernel attributes defined in +The kernel properties below correspond to kernel attributes defined in Section 5.8.1 of the SYCL 2020 specification. Note that deprecated attributes (such as `vec_type_hint`) are not included. -The `max_work_group_size` and `max_linear_work_group_size` kernel properties -are also provided as complements to other properties concerning work-group -sizes, without a corresponding function attribute form. - ```c++ namespace sycl { namespace ext { @@ -143,16 +139,6 @@ struct work_group_size_hint_key { using value_t = property_value...>; }; // work_group_size_hint_key -struct max_work_group_size_key { - template - using value_t = property_value...>; -}; // max_work_group_size_key - -struct max_linear_work_group_size_key { - template - using value_t = property_value>; -}; // max_linear_work_group_size_key - // Corresponds to reqd_sub_group_size struct sub_group_size_key { template @@ -189,12 +175,6 @@ inline constexpr work_group_size_key::value_t work_group_size; template inline constexpr work_group_size_hint_key::value_t work_group_size_hint; -template -inline constexpr max_work_group_size_key::value_t max_work_group_size; - -template -inline constexpr max_linear_work_group_size_key::value_t max_linear_work_group_size; - template inline constexpr sub_group_size_key::value_t sub_group_size; @@ -203,8 +183,6 @@ inline constexpr device_has_key::value_t device_has; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; -template <> struct is_property_key : std::true_type {}; -template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; template <> struct is_property_key : std::true_type {}; @@ -234,6 +212,67 @@ template <> struct is_property_key : std::true_type {}; of the work-group used to invoke the kernel. The order of the template arguments matches the constructor of the `range` class. +|`sub_group_size` +|The `sub_group_size` property adds the requirement that the kernel must be + compiled and executed with the specified sub-group size. An implementation may + throw an exception for certain combinations of property values, devices and + launch configurations, as described for the `reqd_sub_group_size` attribute + in Table 180 of the SYCL 2020 specification. + +|`device_has` +|The `device_has` property adds the requirement that the kernel must be + launched on a device that has all of the aspects listed in the `Aspects` + parameter pack. An implementation may throw an exception or issue a + diagnostic for certain combinations of aspects, devices and kernel functions, + as described for the `device_has` attribute in Table 180 of the SYCL 2020 + specification. + +|=== + +SYCL implementations may introduce additional kernel properties. If any +combinations of kernel attributes are invalid, this must be clearly documented +as part of the new kernel property definition. + +=== Kernel Properties for the CUDA backend + +The kernel properties specified in this section may only be used to decorate +kernels that are submitted to the CUDA backend. Attempting to submit a kernel +with these properties to another backend has undefined behavior. + +```c++ +namespace sycl { +namespace ext { +namespace oneapi { +namespace experimental { + +struct max_work_group_size_key { + template + using value_t = property_value...>; +}; // max_work_group_size_key + +struct max_linear_work_group_size_key { + template + using value_t = property_value>; +}; // max_linear_work_group_size_key + +template +inline constexpr max_work_group_size_key::value_t max_work_group_size; + +template +inline constexpr max_linear_work_group_size_key::value_t max_linear_work_group_size; + +template <> struct is_property_key : std::true_type {}; +template <> struct is_property_key : std::true_type {}; + +} // namespace experimental +} // namespace oneapi +} // namespace ext +} // namespace sycl +``` + +|=== +|Property|Description + |`max_work_group_size` |The `max_work_group_size` property provides a promise to the compiler that the kernel will never be launched with a larger work-group than the @@ -254,27 +293,8 @@ If the kernel is submitted with an `nd_range` that exceeds the size specified by the property, the implementation must throw a synchronous exception with the `errc::nd_range` error code. -|`sub_group_size` -|The `sub_group_size` property adds the requirement that the kernel must be - compiled and executed with the specified sub-group size. An implementation may - throw an exception for certain combinations of property values, devices and - launch configurations, as described for the `reqd_sub_group_size` attribute - in Table 180 of the SYCL 2020 specification. - -|`device_has` -|The `device_has` property adds the requirement that the kernel must be - launched on a device that has all of the aspects listed in the `Aspects` - parameter pack. An implementation may throw an exception or issue a - diagnostic for certain combinations of aspects, devices and kernel functions, - as described for the `device_has` attribute in Table 180 of the SYCL 2020 - specification. - |=== -SYCL implementations may introduce additional kernel properties. If any -combinations of kernel attributes are invalid, this must be clearly documented -as part of the new kernel property definition. - === Adding a Property List to a Kernel Launch To enable properties to be associated with kernels, this extension adds