From 397fcde8c9e40e1f1575c47485b6f79ef0e47ac8 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Wed, 3 Jul 2024 09:51:39 +0100 Subject: [PATCH 1/9] [SYCL] Add kernel properties for three function attributes This patch adds kernel properties for three existing SYCL function attributes: 'max_work_group_size', 'min_work_groups_per_cu', and 'max_work_groups_per_mp'. The semantics of the properties are the same as for their respective function attributes. Each of these attributes was originally earmarked as a SYCL equivalent to CUDA's '__launch_bounds__' qualifier, hence the focus on lowering for NVPTX. It has since been identified that we will need an additional property for CUDA's 'maxThreadsPerBlock' which isn't always expressable with 'max_work_group_size'. That property will come in a follow-up patch. This patch also wires up the 'max_work_group_size' property to the equivalent SPIR-V execution mode, which should hopefully improve certain use cases. It lowers the other two properties despite them not currently having a SPIR-V equivalent. --- clang/lib/CodeGen/Targets/NVPTX.cpp | 29 +++++++ .../SYCLLowerIR/CompileTimePropertiesPass.cpp | 52 ++++++++---- .../oneapi/kernel_properties/properties.hpp | 81 +++++++++++++++++++ .../sycl/ext/oneapi/properties/property.hpp | 5 +- .../properties_kernel_launch_bounds.cpp | 27 +++++++ .../properties_kernel_launch_bounds_nvptx.cpp | 27 +++++++ .../properties_kernel_max_work_group_size.cpp | 29 +++++++ ...rties_kernel_max_work_group_size_nvptx.cpp | 25 ++++++ .../properties/properties_kernel.cpp | 8 ++ 9 files changed, 267 insertions(+), 16 deletions(-) create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp create mode 100644 sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index a1479c7269320..85249582ea87e 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -259,6 +259,22 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "maxntidx", MaxThreads); HasMaxWorkGroupSize = true; } + } else if (auto Attr = F->getFnAttribute("sycl-max-work-group-size"); + Attr.isValid()) { + // Split values in the comma-separated list integers. + SmallVector ValStrs; + Attr.getValueAsString().split(ValStrs, ','); + assert(ValStrs.size() == 3 && "Must have all three dimensions for " + "sycl-max-work-group-size property"); + + static constexpr const char *Annots[] = {"maxntidx", "maxntidy", + "maxntidz"}; + for (auto [AnnotStr, ValStr] : zip(Annots, reverse(ValStrs))) { + int Value = 0; + bool Error = ValStr.getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + addNVVMMetadata(F, AnnotStr, Value); + } } auto attrValue = [&](Expr *E) { @@ -278,6 +294,13 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue())); HasMinWorkGroupPerCU = true; } + } else if (auto Attr = F->getFnAttribute("sycl-min-work-groups-per-cu"); + Attr.isValid()) { + // The value is guaranteed to be > 0, pass it to the metadata. + int Value = 0; + bool Error = Attr.getValueAsString().getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + addNVVMMetadata(F, "minctasm", Value); } if (const auto *MWGPMP = @@ -291,6 +314,12 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // The value is guaranteed to be > 0, pass it to the metadata. addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue())); } + } else if (auto Attr = F->getFnAttribute("sycl-max-work-groups-per-mp"); + Attr.isValid()) { + int Value = 0; + bool Error = Attr.getValueAsString().getAsInteger(10, Value); + assert(!Error && "The attribute's value is not a number"); + addNVVMMetadata(F, "maxclusterrank", Value); } } diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 36adf1e52ff56..9542997886f57 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -361,14 +361,22 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64); } - if (AttrKindStr == "sycl-work-group-size" || - AttrKindStr == "sycl-work-group-size-hint") { + static constexpr std::pair SimpleWGAttrs[] = { + {"sycl-work-group-size", "reqd_work_group_size"}, + {"sycl-work-group-size-hint", "work_group_size_hint"}, + {"sycl-max-work-group-size", "max_work_group_size"}, + }; + + for (auto [AttrKind, MDStr] : SimpleWGAttrs) { + if (AttrKindStr != AttrKind) + continue; // Split values in the comma-separated list integers. SmallVector ValStrs; Attr.getValueAsString().split(ValStrs, ','); assert(ValStrs.size() <= 3 && - "sycl-work-group-size and sycl-work-group-size-hint currently only " + "sycl-work-group-size, sycl-work-group-size-hint and " + "sycl-max-work-group-size currently only " "support up to three values"); // SYCL work-group sizes must be reversed for SPIR-V. @@ -385,20 +393,24 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { MDVals.push_back(ConstantAsMetadata::get( Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10)))); - const char *MDName = (AttrKindStr == "sycl-work-group-size") - ? "reqd_work_group_size" - : "work_group_size_hint"; - return std::pair(MDName, MDNode::get(Ctx, MDVals)); + return std::pair(MDStr, MDNode::get(Ctx, MDVals)); } - if (AttrKindStr == "sycl-sub-group-size") { - uint32_t SubGroupSize = getAttributeAsInteger(Attr); - IntegerType *Ty = Type::getInt32Ty(Ctx); - Metadata *MDVal = ConstantAsMetadata::get( - Constant::getIntegerValue(Ty, APInt(32, SubGroupSize))); - SmallVector MD{MDVal}; - return std::pair("intel_reqd_sub_group_size", - MDNode::get(Ctx, MD)); + static constexpr std::pair SimpleI32Attrs[] = { + {"sycl-sub-group-size", "intel_reqd_sub_group_size"}, + {"sycl-min-work-groups-per-cu", "min_work_groups_per_cu"}, + {"sycl-max-work-groups-per-mp", "max_work_groups_per_mp"}, + }; + + for (auto [AttrKind, MDStr] : SimpleI32Attrs) { + if (AttrKindStr == AttrKind) { + uint32_t SubGroupSize = getAttributeAsInteger(Attr); + IntegerType *Ty = Type::getInt32Ty(Ctx); + Metadata *MDVal = ConstantAsMetadata::get( + Constant::getIntegerValue(Ty, APInt(32, SubGroupSize))); + SmallVector MD{MDVal}; + return std::pair(MDStr, MDNode::get(Ctx, MD)); + } } // The sycl-single-task attribute currently only has an effect when targeting @@ -478,6 +490,16 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { MDNode::get(Ctx, AttrMDArgs)); } + if (AttrKindStr == "sycl-max-work-group-size") { + uint32_t SubGroupSize = getAttributeAsInteger(Attr); + IntegerType *Ty = Type::getInt32Ty(Ctx); + Metadata *MDVal = ConstantAsMetadata::get( + Constant::getIntegerValue(Ty, APInt(32, SubGroupSize))); + SmallVector MD{MDVal}; + return std::pair("max_work_group_size", + MDNode::get(Ctx, MD)); + } + return std::nullopt; } diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index e46ab88c43172..57352d3c1e6ac 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -71,6 +71,27 @@ struct single_task_kernel_key { using value_t = property_value; }; +struct max_work_group_size_key + : detail::compile_time_property_key { + template + using value_t = property_value...>; +}; + +struct min_work_groups_per_cu_key + : detail::compile_time_property_key { + template + using value_t = property_value>; +}; + +struct max_work_groups_per_mp_key + : detail::compile_time_property_key { + template + using value_t = property_value>; +}; + template struct property_value, std::integral_constant...> { @@ -138,6 +159,42 @@ template <> struct property_value { using key_t = single_task_kernel_key; }; +template +struct property_value> { + static_assert( + Size != 0, + "max_work_group_size_key property must contain a non-zero value."); + + using key_t = max_work_group_size_key; + using value_t = std::integral_constant; + static constexpr uint32_t value = Size; +}; + +template +struct property_value> { + static_assert( + Size != 0, + "min_work_groups_per_cu_key property must contain a non-zero value."); + + using key_t = min_work_groups_per_cu_key; + using value_t = std::integral_constant; + static constexpr uint32_t value = Size; +}; + +template +struct property_value> { + static_assert( + Size != 0, + "max_work_groups_per_mp_key property must contain a non-zero value."); + + using key_t = max_work_groups_per_mp_key; + using value_t = std::integral_constant; + static constexpr uint32_t value = Size; +}; + template inline constexpr work_group_size_key::value_t work_group_size; @@ -156,6 +213,15 @@ inline constexpr nd_range_kernel_key::value_t nd_range_kernel; inline constexpr single_task_kernel_key::value_t single_task_kernel; +template +inline constexpr max_work_group_size_key::value_t max_work_group_size; + +template +inline constexpr min_work_groups_per_cu_key::value_t min_work_groups_per_cu; + +template +inline constexpr max_work_groups_per_mp_key::value_t max_work_groups_per_mp; + struct work_group_progress_key : detail::compile_time_property_key { template struct PropertyMetaInfo { static constexpr const char *name = "sycl-single-task-kernel"; static constexpr int value = 0; }; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-work-group-size"; + static constexpr const char *value = SizeListToStr::value; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-min-work-groups-per-cu"; + static constexpr uint32_t value = Size; +}; +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-work-groups-per-mp"; + static constexpr uint32_t value = Size; +}; template struct HasKernelPropertiesGetMethod : std::false_type {}; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index 50b385f3f0cab..38b3ee3eaecf0 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -211,8 +211,11 @@ enum PropKind : uint32_t { OutputDataPlacement = 70, IncludeFiles = 71, RegisteredKernelNames = 72, + MaxWorkGroupSize = 73, + MinWorkGroupsPerCU = 74, + MaxWorkGroupsPerMP = 75, // PropKindSize must always be the last value. - PropKindSize = 73, + PropKindSize = 76, }; struct property_key_base_tag {}; diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp new file mode 100644 index 0000000000000..5cd9d11f97471 --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp @@ -0,0 +1,27 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::min_work_groups_per_cu<8>, + sycl::ext::oneapi::experimental::max_work_groups_per_mp<4>, + }; + // CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] + // CHECK-IR-SAME: !max_work_groups_per_mp ![[MaxWGsPerMPMD:[0-9]+]] + // CHECK-IR-SAME: !min_work_groups_per_cu ![[MinWGsPerCUMD:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { +// CHECK-IR-SAME: "sycl-max-work-groups-per-mp"="4" +// CHECK-IR-SAME: "sycl-min-work-groups-per-cu"="8" + +// CHECK-IR: ![[MaxWGsPerMPMD]] = !{i32 4} +// CHECK-IR: ![[MinWGsPerCUMD]] = !{i32 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp new file mode 100644 index 0000000000000..f26591d2fe6af --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp @@ -0,0 +1,27 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::min_work_groups_per_cu<8>, + sycl::ext::oneapi::experimental::max_work_groups_per_mp<4>, + }; + + // CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { +// CHECK-IR-SAME: "sycl-max-work-groups-per-mp"="4" +// CHECK-IR-SAME: "sycl-min-work-groups-per-cu"="8" + +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1} +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"minctasm", i32 8} +// CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"maxclusterrank", i32 4} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp new file mode 100644 index 0000000000000..218747f85af82 --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size.cpp @@ -0,0 +1,29 @@ +// RUN: %clangxx -fsycl-device-only -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR +// RUN: %clangxx -fsycl -fsyntax-only -Xclang -verify %s +// expected-no-diagnostics + +#include + +int main() { + sycl::queue Q; + sycl::event Ev; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>}; + + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel0(){{.*}} #[[MaxWGSizeAttr1:[0-9]+]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1:[0-9]+]] + Q.single_task(Props, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel1(){{.*}} #[[MaxWGSizeAttr1]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1]] + Q.single_task(Ev, Props, []() {}); + // CHECK-IR: spir_kernel void @{{.*}}MaxWGSizeKernel2(){{.*}} #[[MaxWGSizeAttr1]] + // CHECK-IR-SAME: !max_work_group_size ![[MaxWGSizeMD1]] + Q.single_task({Ev}, Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { {{.*}}"sycl-max-work-group-size"="8,4,2" + +// CHECK-IR: ![[MaxWGSizeMD1]] = !{i64 2, i64 4, i64 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp new file mode 100644 index 0000000000000..e2d4b66cd8f1d --- /dev/null +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_max_work_group_size_nvptx.cpp @@ -0,0 +1,25 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl-device-only -fsycl-targets=nvptx64-nvidia-cuda -S -Xclang -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-IR + +#include + +int main() { + sycl::queue Q; + + constexpr auto Props = sycl::ext::oneapi::experimental::properties{ + sycl::ext::oneapi::experimental::max_work_group_size<8, 4, 2>}; + + // CHECK-IR: define{{.*}}void @[[MaxWGSizeKernelFn:.*MaxWGSizeKernel0]](){{.*}} #[[MaxWGSizeAttr1:[0-9]+]] + Q.single_task(Props, []() {}); + + return 0; +} + +// CHECK-IR: attributes #[[MaxWGSizeAttr1]] = { +// CHECK-IR-SAME: "sycl-max-work-group-size"="8,4,2" + +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"kernel", i32 1} +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidx", i32 2} +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidy", i32 4} +// CHECK-IR-DAG: !{ptr @[[MaxWGSizeKernelFn]], !"maxntidz", i32 8} diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index 3868c23f7535c..18a6bb4a345d3 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -50,6 +50,8 @@ int main() { static_assert( is_property_value)>::value); static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); + static_assert(is_property_value)>::value); static_assert( std::is_same_v)::key_t>); @@ -66,6 +68,10 @@ int main() { decltype(work_group_size_hint<13, 13, 13>)::key_t>); static_assert( std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); + static_assert(std::is_same_v)::key_t>); static_assert(work_group_size<15>[0] == 15); static_assert(work_group_size<16, 17>[0] == 16); @@ -80,6 +86,8 @@ int main() { static_assert(work_group_size_hint<24, 25, 26>[1] == 25); static_assert(work_group_size_hint<24, 25, 26>[2] == 26); static_assert(sub_group_size<27>.value == 27); + static_assert(min_work_groups_per_cu<28>.value == 28); + static_assert(max_work_groups_per_mp<29>.value == 29); static_assert(std::is_same_v)::value_t, std::integral_constant>); From a677fae17a99b56f910011a7506d90b86e174de8 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Jul 2024 15:59:02 +0100 Subject: [PATCH 2/9] remove unnecessary comment --- clang/lib/CodeGen/Targets/NVPTX.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 85249582ea87e..116e78759cb00 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -296,7 +296,6 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( } } else if (auto Attr = F->getFnAttribute("sycl-min-work-groups-per-cu"); Attr.isValid()) { - // The value is guaranteed to be > 0, pass it to the metadata. int Value = 0; bool Error = Attr.getValueAsString().getAsInteger(10, Value); assert(!Error && "The attribute's value is not a number"); From f9df938f310318aae8b4d43f28dbfbcaaa20dafe Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Jul 2024 16:10:03 +0100 Subject: [PATCH 3/9] fix formatting; fix max_work_group_size property value; test it --- .../oneapi/kernel_properties/properties.hpp | 27 ++++++++++++------- .../properties/properties_kernel.cpp | 8 ++++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 57352d3c1e6ac..22aa1c5bfdfea 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -159,16 +159,22 @@ template <> struct property_value { using key_t = single_task_kernel_key; }; -template +template struct property_value> { + std::integral_constant, + std::integral_constant...> { + static_assert(sizeof...(Dims) + 1 <= 3, + "max_work_group_size property currently " + "only supports exactly three values."); static_assert( - Size != 0, - "max_work_group_size_key property must contain a non-zero value."); + detail::AllNonZero::value, + "max_work_group_size property must only contain non-zero values."); using key_t = max_work_group_size_key; - using value_t = std::integral_constant; - static constexpr uint32_t value = Size; + + constexpr size_t operator[](int Dim) const { + return std::array{Dim0, Dims...}[Dim]; + } }; template @@ -214,13 +220,16 @@ inline constexpr nd_range_kernel_key::value_t nd_range_kernel; inline constexpr single_task_kernel_key::value_t single_task_kernel; template -inline constexpr max_work_group_size_key::value_t max_work_group_size; +inline constexpr max_work_group_size_key::value_t + max_work_group_size; template -inline constexpr min_work_groups_per_cu_key::value_t min_work_groups_per_cu; +inline constexpr min_work_groups_per_cu_key::value_t + min_work_groups_per_cu; template -inline constexpr max_work_groups_per_mp_key::value_t max_work_groups_per_mp; +inline constexpr max_work_groups_per_mp_key::value_t + max_work_groups_per_mp; struct work_group_progress_key : detail::compile_time_property_key { diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index 18a6bb4a345d3..e1ae45ab619b5 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -50,6 +50,8 @@ int main() { static_assert( is_property_value)>::value); static_assert(is_property_value)>::value); + static_assert( + is_property_value)>::value); static_assert(is_property_value)>::value); static_assert(is_property_value)>::value); @@ -68,6 +70,9 @@ int main() { decltype(work_group_size_hint<13, 13, 13>)::key_t>); static_assert( std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); static_assert(std::is_same_v)::key_t>); static_assert(std::is_same_v[1] == 25); static_assert(work_group_size_hint<24, 25, 26>[2] == 26); static_assert(sub_group_size<27>.value == 27); + static_assert(max_work_group_size<28, 29, 30>[0] == 28); + static_assert(max_work_group_size<28, 29, 30>[1] == 29); + static_assert(max_work_group_size<28, 29, 30>[2] == 30); static_assert(min_work_groups_per_cu<28>.value == 28); static_assert(max_work_groups_per_mp<29>.value == 29); From 781aeeecebe1d93a37391ad6e3fc503f0fc33e62 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Jul 2024 16:17:41 +0100 Subject: [PATCH 4/9] make assertion do as it says --- sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 22aa1c5bfdfea..07afb4f3d2834 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -163,7 +163,7 @@ template struct property_value, std::integral_constant...> { - static_assert(sizeof...(Dims) + 1 <= 3, + static_assert(sizeof...(Dims) + 1 == 3, "max_work_group_size property currently " "only supports exactly three values."); static_assert( From f30b59452372be54826529fa919a61142701a629 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Jul 2024 17:34:56 +0100 Subject: [PATCH 5/9] remove unused code --- llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 9542997886f57..b30e14c7f157b 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -490,16 +490,6 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { MDNode::get(Ctx, AttrMDArgs)); } - if (AttrKindStr == "sycl-max-work-group-size") { - uint32_t SubGroupSize = getAttributeAsInteger(Attr); - IntegerType *Ty = Type::getInt32Ty(Ctx); - Metadata *MDVal = ConstantAsMetadata::get( - Constant::getIntegerValue(Ty, APInt(32, SubGroupSize))); - SmallVector MD{MDVal}; - return std::pair("max_work_group_size", - MDNode::get(Ctx, MD)); - } - return std::nullopt; } From 323d1d0f92f5e431cd4bdacdd87006111c3507ab Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 8 Jul 2024 10:39:30 +0100 Subject: [PATCH 6/9] bounds check --- .../SYCLLowerIR/CompileTimePropertiesPass.cpp | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index b30e14c7f157b..58cf3f057a0ab 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -361,26 +361,29 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { AddFPControlMetadataForWidth(SPIRV_DENORM_PRESERVE, 64); } - static constexpr std::pair SimpleWGAttrs[] = { - {"sycl-work-group-size", "reqd_work_group_size"}, - {"sycl-work-group-size-hint", "work_group_size_hint"}, - {"sycl-max-work-group-size", "max_work_group_size"}, - }; - - for (auto [AttrKind, MDStr] : SimpleWGAttrs) { + static constexpr std::tuple + SimpleWGAttrs[] = { + {"sycl-work-group-size", "reqd_work_group_size", + /*requiresAll3Dims*/ false}, + {"sycl-work-group-size-hint", "work_group_size_hint", + /*requiresAll3Dims*/ false}, + {"sycl-max-work-group-size", "max_work_group_size", + /*requiresAll3Dims*/ true}, + }; + + for (auto &[AttrKind, MDStr, Req3D] : SimpleWGAttrs) { if (AttrKindStr != AttrKind) continue; // Split values in the comma-separated list integers. - SmallVector ValStrs; - Attr.getValueAsString().split(ValStrs, ','); + SmallVector AttrValStrs; + Attr.getValueAsString().split(AttrValStrs, ','); - assert(ValStrs.size() <= 3 && - "sycl-work-group-size, sycl-work-group-size-hint and " - "sycl-max-work-group-size currently only " - "support up to three values"); + assert(((Req3D && AttrValStrs.size() == 3) || + (!Req3D && AttrValStrs.size() <= 3)) && + "Incorrect number of values for kernel property"); // SYCL work-group sizes must be reversed for SPIR-V. - std::reverse(ValStrs.begin(), ValStrs.end()); + std::reverse(AttrValStrs.begin(), AttrValStrs.end()); // Use integer pointer size as closest analogue to size_t. IntegerType *IntPtrTy = DLayout.getIntPtrType(Ctx); @@ -389,7 +392,7 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { // Get the integers from the strings. SmallVector MDVals; - for (StringRef ValStr : ValStrs) + for (StringRef ValStr : AttrValStrs) MDVals.push_back(ConstantAsMetadata::get( Constant::getIntegerValue(SizeTTy, APInt(SizeTBitSize, ValStr, 10)))); From 749693572a6d4eb817e0213410a48892630a69de Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 8 Jul 2024 10:40:28 +0100 Subject: [PATCH 7/9] update comment --- llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 58cf3f057a0ab..a0d04b515c8ff 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -364,11 +364,11 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { static constexpr std::tuple SimpleWGAttrs[] = { {"sycl-work-group-size", "reqd_work_group_size", - /*requiresAll3Dims*/ false}, + /*RequiresAll3Dims*/ false}, {"sycl-work-group-size-hint", "work_group_size_hint", - /*requiresAll3Dims*/ false}, + /*RequiresAll3Dims*/ false}, {"sycl-max-work-group-size", "max_work_group_size", - /*requiresAll3Dims*/ true}, + /*RequiresAll3Dims*/ true}, }; for (auto &[AttrKind, MDStr, Req3D] : SimpleWGAttrs) { From 60da7cdbfb97b37072222b85e3178f077c5d97da Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Mon, 8 Jul 2024 10:48:25 +0100 Subject: [PATCH 8/9] feedback: property doesn't need parameter pack --- .../oneapi/kernel_properties/properties.hpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 07afb4f3d2834..6f7b080387127 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -73,9 +73,11 @@ struct single_task_kernel_key { struct max_work_group_size_key : detail::compile_time_property_key { - template + template using value_t = property_value...>; + std::integral_constant, + std::integral_constant, + std::integral_constant>; }; struct min_work_groups_per_cu_key @@ -159,21 +161,19 @@ template <> struct property_value { using key_t = single_task_kernel_key; }; -template +template struct property_value, - std::integral_constant...> { - static_assert(sizeof...(Dims) + 1 == 3, - "max_work_group_size property currently " - "only supports exactly three values."); + std::integral_constant, + std::integral_constant> { static_assert( - detail::AllNonZero::value, + detail::AllNonZero::value, "max_work_group_size property must only contain non-zero values."); using key_t = max_work_group_size_key; constexpr size_t operator[](int Dim) const { - return std::array{Dim0, Dims...}[Dim]; + return std::array{Dim0, Dim1, Dim2}[Dim]; } }; @@ -219,8 +219,8 @@ inline constexpr nd_range_kernel_key::value_t nd_range_kernel; inline constexpr single_task_kernel_key::value_t single_task_kernel; -template -inline constexpr max_work_group_size_key::value_t +template +inline constexpr max_work_group_size_key::value_t max_work_group_size; template @@ -345,10 +345,10 @@ template <> struct PropertyMetaInfo { static constexpr const char *name = "sycl-single-task-kernel"; static constexpr int value = 0; }; -template -struct PropertyMetaInfo> { +template +struct PropertyMetaInfo> { static constexpr const char *name = "sycl-max-work-group-size"; - static constexpr const char *value = SizeListToStr::value; + static constexpr const char *value = SizeListToStr::value; }; template struct PropertyMetaInfo> { From a10c350d4d7f4e804c6592e8a2c707aea6ed3d0b Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Tue, 9 Jul 2024 12:34:21 +0100 Subject: [PATCH 9/9] rename properties --- clang/lib/CodeGen/Targets/NVPTX.cpp | 6 ++- .../SYCLLowerIR/CompileTimePropertiesPass.cpp | 5 +- .../oneapi/kernel_properties/properties.hpp | 48 +++++++++---------- .../sycl/ext/oneapi/properties/property.hpp | 4 +- .../properties_kernel_launch_bounds.cpp | 16 +++---- .../properties_kernel_launch_bounds_nvptx.cpp | 8 ++-- .../properties/properties_kernel.cpp | 20 ++++---- 7 files changed, 57 insertions(+), 50 deletions(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 428e4119bbde1..ac90175c73f65 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -294,7 +294,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "minctasm", attrValue(MWGPCU->getValue())); HasMinWorkGroupPerCU = true; } - } else if (auto Attr = F->getFnAttribute("sycl-min-work-groups-per-cu"); + } else if (auto Attr = + F->getFnAttribute("sycl-min-work-groups-per-multiprocessor"); Attr.isValid()) { int Value = 0; bool Error = Attr.getValueAsString().getAsInteger(10, Value); @@ -313,7 +314,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // The value is guaranteed to be > 0, pass it to the metadata. addNVVMMetadata(F, "maxclusterrank", attrValue(MWGPMP->getValue())); } - } else if (auto Attr = F->getFnAttribute("sycl-max-work-groups-per-mp"); + } else if (auto Attr = + F->getFnAttribute("sycl-max-work-groups-per-cluster"); Attr.isValid()) { int Value = 0; bool Error = Attr.getValueAsString().getAsInteger(10, Value); diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index a0d04b515c8ff..99721393f17f3 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -401,8 +401,9 @@ attributeToExecModeMetadata(const Attribute &Attr, Function &F) { static constexpr std::pair SimpleI32Attrs[] = { {"sycl-sub-group-size", "intel_reqd_sub_group_size"}, - {"sycl-min-work-groups-per-cu", "min_work_groups_per_cu"}, - {"sycl-max-work-groups-per-mp", "max_work_groups_per_mp"}, + {"sycl-min-work-groups-per-multiprocessor", + "min_work_groups_per_multiprocessor"}, + {"sycl-max-work-groups-per-cluster", "max_work_groups_per_cluster"}, }; for (auto [AttrKind, MDStr] : SimpleI32Attrs) { diff --git a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp index 6f7b080387127..7fea0970ce26e 100644 --- a/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp +++ b/sycl/include/sycl/ext/oneapi/kernel_properties/properties.hpp @@ -80,17 +80,19 @@ struct max_work_group_size_key std::integral_constant>; }; -struct min_work_groups_per_cu_key - : detail::compile_time_property_key { +struct min_work_groups_per_multiprocessor_key + : detail::compile_time_property_key< + detail::PropKind::MinWorkGroupsPerMultiprocessor> { template - using value_t = property_value>; }; -struct max_work_groups_per_mp_key - : detail::compile_time_property_key { +struct max_work_groups_per_cluster_key + : detail::compile_time_property_key< + detail::PropKind::MaxWorkGroupsPerCluster> { template - using value_t = property_value>; }; @@ -178,25 +180,23 @@ struct property_value -struct property_value> { - static_assert( - Size != 0, - "min_work_groups_per_cu_key property must contain a non-zero value."); + static_assert(Size != 0, "min_work_groups_per_multiprocessor_key property " + "must contain a non-zero value."); - using key_t = min_work_groups_per_cu_key; + using key_t = min_work_groups_per_multiprocessor_key; using value_t = std::integral_constant; static constexpr uint32_t value = Size; }; template -struct property_value> { - static_assert( - Size != 0, - "max_work_groups_per_mp_key property must contain a non-zero value."); + static_assert(Size != 0, "max_work_groups_per_cluster_key property must " + "contain a non-zero value."); - using key_t = max_work_groups_per_mp_key; + using key_t = max_work_groups_per_cluster_key; using value_t = std::integral_constant; static constexpr uint32_t value = Size; }; @@ -224,12 +224,12 @@ inline constexpr max_work_group_size_key::value_t max_work_group_size; template -inline constexpr min_work_groups_per_cu_key::value_t - min_work_groups_per_cu; +inline constexpr min_work_groups_per_multiprocessor_key::value_t + min_work_groups_per_multiprocessor; template -inline constexpr max_work_groups_per_mp_key::value_t - max_work_groups_per_mp; +inline constexpr max_work_groups_per_cluster_key::value_t + max_work_groups_per_cluster; struct work_group_progress_key : detail::compile_time_property_key { @@ -351,13 +351,13 @@ struct PropertyMetaInfo> { static constexpr const char *value = SizeListToStr::value; }; template -struct PropertyMetaInfo> { - static constexpr const char *name = "sycl-min-work-groups-per-cu"; +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-min-work-groups-per-multiprocessor"; static constexpr uint32_t value = Size; }; template -struct PropertyMetaInfo> { - static constexpr const char *name = "sycl-max-work-groups-per-mp"; +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-max-work-groups-per-cluster"; static constexpr uint32_t value = Size; }; diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index fc6fbba8d1acb..4b15d6b45339c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -213,8 +213,8 @@ enum PropKind : uint32_t { RegisteredKernelNames = 72, ClusterLaunch = 73, MaxWorkGroupSize = 74, - MinWorkGroupsPerCU = 75, - MaxWorkGroupsPerMP = 76, + MinWorkGroupsPerMultiprocessor = 75, + MaxWorkGroupsPerCluster = 76, // PropKindSize must always be the last value. PropKindSize = 77, }; diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp index 5cd9d11f97471..76ba1fcdd843f 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds.cpp @@ -8,20 +8,20 @@ int main() { sycl::queue Q; constexpr auto Props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::min_work_groups_per_cu<8>, - sycl::ext::oneapi::experimental::max_work_groups_per_mp<4>, + sycl::ext::oneapi::experimental::min_work_groups_per_multiprocessor<8>, + sycl::ext::oneapi::experimental::max_work_groups_per_cluster<4>, }; // CHECK-IR: spir_kernel void @{{.*}}LaunchBoundsKernel(){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] - // CHECK-IR-SAME: !max_work_groups_per_mp ![[MaxWGsPerMPMD:[0-9]+]] - // CHECK-IR-SAME: !min_work_groups_per_cu ![[MinWGsPerCUMD:[0-9]+]] + // CHECK-IR-SAME: !max_work_groups_per_cluster ![[MaxWGsPerCMD:[0-9]+]] + // CHECK-IR-SAME: !min_work_groups_per_multiprocessor ![[MinWGsPerMPMD:[0-9]+]] Q.single_task(Props, []() {}); return 0; } // CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { -// CHECK-IR-SAME: "sycl-max-work-groups-per-mp"="4" -// CHECK-IR-SAME: "sycl-min-work-groups-per-cu"="8" +// CHECK-IR-SAME: "sycl-max-work-groups-per-cluster"="4" +// CHECK-IR-SAME: "sycl-min-work-groups-per-multiprocessor"="8" -// CHECK-IR: ![[MaxWGsPerMPMD]] = !{i32 4} -// CHECK-IR: ![[MinWGsPerCUMD]] = !{i32 8} +// CHECK-IR: ![[MaxWGsPerCMD]] = !{i32 4} +// CHECK-IR: ![[MinWGsPerMPMD]] = !{i32 8} diff --git a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp index f26591d2fe6af..4a137afd0d56a 100644 --- a/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp +++ b/sycl/test/check_device_code/extensions/properties/properties_kernel_launch_bounds_nvptx.cpp @@ -8,8 +8,8 @@ int main() { sycl::queue Q; constexpr auto Props = sycl::ext::oneapi::experimental::properties{ - sycl::ext::oneapi::experimental::min_work_groups_per_cu<8>, - sycl::ext::oneapi::experimental::max_work_groups_per_mp<4>, + sycl::ext::oneapi::experimental::min_work_groups_per_multiprocessor<8>, + sycl::ext::oneapi::experimental::max_work_groups_per_cluster<4>, }; // CHECK-IR: define{{.*}}void @[[LaunchBoundsKernelFn:.*LaunchBoundsKernel0]](){{.*}} #[[LaunchBoundsAttrs:[0-9]+]] @@ -19,8 +19,8 @@ int main() { } // CHECK-IR: attributes #[[LaunchBoundsAttrs]] = { -// CHECK-IR-SAME: "sycl-max-work-groups-per-mp"="4" -// CHECK-IR-SAME: "sycl-min-work-groups-per-cu"="8" +// CHECK-IR-SAME: "sycl-max-work-groups-per-cluster"="4" +// CHECK-IR-SAME: "sycl-min-work-groups-per-multiprocessor"="8" // CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"kernel", i32 1} // CHECK-IR-DAG: !{ptr @[[LaunchBoundsKernelFn]], !"minctasm", i32 8} diff --git a/sycl/test/extensions/properties/properties_kernel.cpp b/sycl/test/extensions/properties/properties_kernel.cpp index e1ae45ab619b5..a1389090c42e6 100644 --- a/sycl/test/extensions/properties/properties_kernel.cpp +++ b/sycl/test/extensions/properties/properties_kernel.cpp @@ -52,8 +52,10 @@ int main() { static_assert(is_property_value)>::value); static_assert( is_property_value)>::value); - static_assert(is_property_value)>::value); - static_assert(is_property_value)>::value); + static_assert(is_property_value< + decltype(min_work_groups_per_multiprocessor<8>)>::value); + static_assert( + is_property_value)>::value); static_assert( std::is_same_v)::key_t>); @@ -73,10 +75,12 @@ int main() { static_assert( std::is_same_v)::key_t>); - static_assert(std::is_same_v)::key_t>); - static_assert(std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); + static_assert( + std::is_same_v)::key_t>); static_assert(work_group_size<15>[0] == 15); static_assert(work_group_size<16, 17>[0] == 16); @@ -94,8 +98,8 @@ int main() { static_assert(max_work_group_size<28, 29, 30>[0] == 28); static_assert(max_work_group_size<28, 29, 30>[1] == 29); static_assert(max_work_group_size<28, 29, 30>[2] == 30); - static_assert(min_work_groups_per_cu<28>.value == 28); - static_assert(max_work_groups_per_mp<29>.value == 29); + static_assert(min_work_groups_per_multiprocessor<28>.value == 28); + static_assert(max_work_groups_per_cluster<29>.value == 29); static_assert(std::is_same_v)::value_t, std::integral_constant>);