diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 7183784b5cfff..fdd987b43c2ae 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -670,6 +670,10 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, AttrMDArgs.push_back( llvm::ConstantAsMetadata::get(Builder.getInt(*XDimVal))); + for (auto i = AttrMDArgs.size(); i < 3; ++i) + AttrMDArgs.push_back( + llvm::ConstantAsMetadata::get(Builder.getInt(llvm::APInt(32, 1)))); + Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs)); } @@ -690,16 +694,28 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, std::optional ZDimVal = A->getZDimVal(); llvm::SmallVector AttrMDArgs; + llvm::APInt NumDims(32, 1); // X // On SYCL target the dimensions are reversed if present. - if (ZDimVal) + if (ZDimVal) { AttrMDArgs.push_back( llvm::ConstantAsMetadata::get(Builder.getInt(*ZDimVal))); - if (YDimVal) + ++NumDims; + } + if (YDimVal) { AttrMDArgs.push_back( llvm::ConstantAsMetadata::get(Builder.getInt(*YDimVal))); + ++NumDims; + } AttrMDArgs.push_back( llvm::ConstantAsMetadata::get(Builder.getInt(*XDimVal))); + for (auto i = NumDims.getZExtValue(); i < 3; ++i) + AttrMDArgs.push_back( + llvm::ConstantAsMetadata::get(Builder.getInt(llvm::APInt(32, 1)))); + + Fn->setMetadata("work_group_num_dim", + llvm::MDNode::get(Context, llvm::ConstantAsMetadata::get( + Builder.getInt(NumDims)))); Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs)); } diff --git a/clang/test/CodeGenSYCL/check-work-group-attributes-match.cpp b/clang/test/CodeGenSYCL/check-work-group-attributes-match.cpp index 3fae9e167c1a5..8ecc995dc566b 100644 --- a/clang/test/CodeGenSYCL/check-work-group-attributes-match.cpp +++ b/clang/test/CodeGenSYCL/check-work-group-attributes-match.cpp @@ -1,5 +1,9 @@ // RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fno-sycl-force-inline-kernel-lambda -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s + // Tests that work_group_size_hint and reqd_work_group_size generate the same // metadata nodes for the same arguments. @@ -11,21 +15,24 @@ int main() { queue q; q.submit([&](handler &h) { - // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_1d() #0 {{.*}} !work_group_size_hint ![[WG1D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG1D]] + // CHECK: define {{.*}} void @{{.*}}kernel_1d() #0 {{.*}} !work_group_size_hint ![[WGSH1D:[0-9]+]]{{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WGSH1D]] h.single_task([]() [[sycl::work_group_size_hint(8)]] [[sycl::reqd_work_group_size(8)]] {}); }); q.submit([&](handler &h) { - // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_2d() #0 {{.*}} !work_group_size_hint ![[WG2D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG2D]] + // CHECK: define {{.*}} void @{{.*}}kernel_2d() #0 {{.*}} !work_group_size_hint ![[WGSH2D:[0-9]+]]{{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WGSH2D:[0-9]+]]{{.*}} h.single_task([]() [[sycl::work_group_size_hint(8, 16)]] [[sycl::reqd_work_group_size(8, 16)]] {}); }); q.submit([&](handler &h) { - // CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_3d() #0 {{.*}} !work_group_size_hint ![[WG3D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG3D]] + // CHECK: define {{.*}} void @{{.*}}kernel_3d() #0 {{.*}} !work_group_size_hint ![[WG3D:[0-9]+]]{{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]]{{.*}} !reqd_work_group_size ![[WG3D]] h.single_task([]() [[sycl::work_group_size_hint(8, 16, 32)]] [[sycl::reqd_work_group_size(8, 16, 32)]] {}); }); } -// CHECK: ![[WG1D]] = !{i32 8} -// CHECK: ![[WG2D]] = !{i32 16, i32 8} +// CHECK: ![[WGSH1D]] = !{i32 8, i32 1, i32 1} +// CHECK: ![[NDRWGS1D]] = !{i32 1} +// CHECK: ![[WGSH2D]] = !{i32 16, i32 8, i32 1} +// CHECK: ![[NDRWGS2D]] = !{i32 2} // CHECK: ![[WG3D]] = !{i32 32, i32 16, i32 8} +// CHECK: ![[NDRWGS3D]] = !{i32 3} diff --git a/clang/test/CodeGenSYCL/reqd-work-group-size.cpp b/clang/test/CodeGenSYCL/reqd-work-group-size.cpp index 2bb890b727f76..2eff74dc8776f 100644 --- a/clang/test/CodeGenSYCL/reqd-work-group-size.cpp +++ b/clang/test/CodeGenSYCL/reqd-work-group-size.cpp @@ -1,4 +1,7 @@ // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -sycl-std=2017 -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" @@ -163,43 +166,46 @@ int main() { return 0; } -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name1() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D32:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name2() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D8:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name3() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D88:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name4() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D22:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name5() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D44:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name6() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D2:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name7() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D32]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name8() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D8]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name9() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D88]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name10() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D22]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name11() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D44]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name12() #0 {{.*}} !reqd_work_group_size ![[WGSIZE3D2]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name13() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D32:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name14() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D8:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name15() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D88:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name16() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D22:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name17() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D44:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name18() #0 {{.*}} !reqd_work_group_size ![[WGSIZE2D2:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name19() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D32:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name20() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D8:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name21() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D8]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name22() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name23() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D8]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}kernel_name24() #0 {{.*}} !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name1() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D32:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name2() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D8:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name3() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D88:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name4() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D22:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name5() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D44:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name6() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D2:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name7() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D32]] +// CHECK: define {{.*}} void @{{.*}}kernel_name8() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D8]] +// CHECK: define {{.*}} void @{{.*}}kernel_name9() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D88]] +// CHECK: define {{.*}} void @{{.*}}kernel_name10() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D22]] +// CHECK: define {{.*}} void @{{.*}}kernel_name11() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D44]] +// CHECK: define {{.*}} void @{{.*}}kernel_name12() #0 {{.*}} !work_group_num_dim ![[NDRWGS3D:[0-9]+]] !reqd_work_group_size ![[WGSIZE3D2]] +// CHECK: define {{.*}} void @{{.*}}kernel_name13() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D32:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name14() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D8:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name15() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D88:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name16() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D22:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name17() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D44:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name18() #0 {{.*}} !work_group_num_dim ![[NDRWGS2D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name19() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D32:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name20() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8]] +// CHECK: define {{.*}} void @{{.*}}kernel_name21() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8]] +// CHECK: define {{.*}} void @{{.*}}kernel_name22() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]] +// CHECK: define {{.*}} void @{{.*}}kernel_name23() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE2D2_or_WGSIZE1D8]] +// CHECK: define {{.*}} void @{{.*}}kernel_name24() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]] + +// CHECK: ![[NDRWGS3D]] = !{i32 3} // CHECK: ![[WGSIZE3D32]] = !{i32 16, i32 16, i32 32} // CHECK: ![[WGSIZE3D8]] = !{i32 1, i32 1, i32 8} // CHECK: ![[WGSIZE3D88]] = !{i32 8, i32 8, i32 8} // CHECK: ![[WGSIZE3D22]] = !{i32 2, i32 2, i32 2} // CHECK: ![[WGSIZE3D44]] = !{i32 4, i32 4, i32 8} // CHECK: ![[WGSIZE3D2]] = !{i32 2, i32 8, i32 1} -// CHECK: ![[WGSIZE2D32]] = !{i32 16, i32 32} -// CHECK: ![[WGSIZE2D8]] = !{i32 1, i32 8} -// CHECK: ![[WGSIZE2D88]] = !{i32 8, i32 8} -// CHECK: ![[WGSIZE2D22]] = !{i32 2, i32 2} -// CHECK: ![[WGSIZE2D44]] = !{i32 4, i32 8} -// CHECK: ![[WGSIZE2D2]] = !{i32 8, i32 1} -// CHECK: ![[WGSIZE1D32]] = !{i32 32} -// CHECK: ![[WGSIZE1D8]] = !{i32 8} -// CHECK: ![[WGSIZE1D22]] = !{i32 2} -// CHECK: ![[WGSIZE1D2]] = !{i32 1} +// CHECK: ![[NDRWGS2D]] = !{i32 2} +// CHECK: ![[WGSIZE2D32]] = !{i32 16, i32 32, i32 1} +// CHECK: ![[WGSIZE2D8]] = !{i32 1, i32 8, i32 1} +// CHECK: ![[WGSIZE2D88]] = !{i32 8, i32 8, i32 1} +// CHECK: ![[WGSIZE2D22]] = !{i32 2, i32 2, i32 1} +// CHECK: ![[WGSIZE2D44]] = !{i32 4, i32 8, i32 1} +// CHECK: ![[WGSIZE2D2_or_WGSIZE1D8]] = !{i32 8, i32 1, i32 1} +// CHECK: ![[NDRWGS1D]] = !{i32 1} +// CHECK: ![[WGSIZE1D32]] = !{i32 32, i32 1, i32 1} +// CHECK: ![[WGSIZE1D22]] = !{i32 2, i32 1, i32 1} +// CHECK: ![[WGSIZE1D2]] = !{i32 1, i32 1, i32 1} diff --git a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h index 7ac19bd0f9f45..abb78b51af154 100644 --- a/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h +++ b/llvm/include/llvm/SYCLLowerIR/SYCLDeviceRequirements.h @@ -33,6 +33,7 @@ struct SYCLDeviceRequirements { std::set Aspects; std::set FixedTarget; std::optional> ReqdWorkGroupSize; + std::optional WorkGroupNumDim; std::optional> JointMatrix; std::optional> JointMatrixMad; std::optional SubGroupSize; diff --git a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp index 92ff992141945..7b2e7ac0f978a 100644 --- a/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp +++ b/llvm/lib/SYCLLowerIR/ModuleSplitter.cpp @@ -982,6 +982,7 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, Categorizer.registerSimpleStringAttributeRule("sycl-grf-size"); Categorizer.registerListOfIntegersInMetadataSortedRule("sycl_used_aspects"); Categorizer.registerListOfIntegersInMetadataRule("reqd_work_group_size"); + Categorizer.registerListOfIntegersInMetadataRule("work_group_num_dim"); Categorizer.registerListOfIntegersInMetadataRule( "intel_reqd_sub_group_size"); Categorizer.registerSimpleStringAttributeRule( diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index 6c0f1c952030b..311f20d86c906 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -57,6 +57,12 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { } } + if (auto *MDN = F.getMetadata("work_group_num_dim")) { + uint32_t WGND = ExtractUnsignedIntegerFromMDNodeOperand(MDN, 0); + if (!Reqs.ReqdWorkGroupSize.has_value()) + Reqs.WorkGroupNumDim = WGND; + } + if (auto *MDN = F.getMetadata("reqd_work_group_size")) { llvm::SmallVector NewReqdWorkGroupSize; for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) @@ -133,5 +139,8 @@ std::map SYCLDeviceRequirements::asMap() const { if (SubGroupSize.has_value()) Requirements["reqd_sub_group_size"] = *SubGroupSize; + if (WorkGroupNumDim.has_value()) + Requirements["work_group_num_dim"] = *WorkGroupNumDim; + return Requirements; } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e2bd06e0707bd..6c6db956c383a 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -347,6 +347,16 @@ bool isModuleUsingAsan(const Module &M) { return MDVal->getString() == "asan"; } +// Gets work_group_num_dim information for function Func, conviniently 0 if +// metadata is not present. +uint32_t getKernelWorkGroupNumDim(const Function &Func) { + MDNode *MaxDimMD = Func.getMetadata("work_group_num_dim"); + if (!MaxDimMD) + return 0; + assert(MaxDimMD->getNumOperands() == 1 && "Malformed node."); + return mdconst::extract(MaxDimMD->getOperand(0))->getZExtValue(); +} + // Gets reqd_work_group_size information for function Func. std::vector getKernelReqdWorkGroupSizeMetadata(const Function &Func) { MDNode *ReqdWorkGroupSizeMD = Func.getMetadata("reqd_work_group_size"); @@ -473,15 +483,23 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, SmallVector MetadataNames; if (GlobProps.EmitProgramMetadata) { - // Add reqd_work_group_size information to program metadata + // Add reqd_work_group_size and work_group_num_dim information to + // program metadata. for (const Function &Func : M.functions()) { std::vector KernelReqdWorkGroupSize = getKernelReqdWorkGroupSizeMetadata(Func); - if (KernelReqdWorkGroupSize.empty()) - continue; - MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); - PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), - KernelReqdWorkGroupSize); + if (!KernelReqdWorkGroupSize.empty()) { + MetadataNames.push_back(Func.getName().str() + "@reqd_work_group_size"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + KernelReqdWorkGroupSize); + } + + uint32_t WorkGroupNumDim = getKernelWorkGroupNumDim(Func); + if (WorkGroupNumDim) { + MetadataNames.push_back(Func.getName().str() + "@work_group_num_dim"); + PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), + WorkGroupNumDim); + } } // Add global_id_mapping information with mapping between device-global diff --git a/sycl/plugins/unified_runtime/CMakeLists.txt b/sycl/plugins/unified_runtime/CMakeLists.txt index b74b70bf87ada..2ca6c7c115f43 100644 --- a/sycl/plugins/unified_runtime/CMakeLists.txt +++ b/sycl/plugins/unified_runtime/CMakeLists.txt @@ -95,13 +95,13 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) endfunction() set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") - # commit 056d653264034e546d8b8f493e1d9f65c697829b - # Merge: b7c89302 bbb04b65 + # commit 8cdd099ae3d1a34d3bcd7cbed7f5745c3dc8e112 + # Merge: fc9bb61b c893a3c4 # Author: Kenneth Benzie (Benie) - # Date: Fri May 17 15:11:12 2024 +0100 - # Merge pull request #1512 from DBDuncan/duncan/fix_pi_mem_leak - # [Bindless][Exp] Remove phMem argument from bindless image creation functions - set(UNIFIED_RUNTIME_TAG 056d653264034e546d8b8f493e1d9f65c697829b) + # Date: Mon May 20 15:50:02 2024 +0100 + # Merge pull request #954 from jchlanda/jakub/rqwgs_hip + # [HIP] Handle required wg size attribute in HIP + set(UNIFIED_RUNTIME_TAG 8cdd099ae3d1a34d3bcd7cbed7f5745c3dc8e112) fetch_adapter_source(level_zero ${UNIFIED_RUNTIME_REPO} diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 442c004278e2a..c0b8ac875e67f 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2681,8 +2681,7 @@ checkDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img, const NDRDescT &NDRDesc) { auto getPropIt = [&Img](const std::string &PropName) { - const RTDeviceBinaryImage::PropertyRange &PropRange = - Img.getDeviceRequirements(); + auto &PropRange = Img.getDeviceRequirements(); RTDeviceBinaryImage::PropertyRange::ConstIterator PropIt = std::find_if( PropRange.begin(), PropRange.end(), [&PropName](RTDeviceBinaryImage::PropertyRange::ConstIterator &&Prop) { @@ -2700,6 +2699,7 @@ checkDevSupportDeviceRequirements(const device &Dev, auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size"); auto ReqdWGSizeUint64TPropIt = getPropIt("reqd_work_group_size_uint64_t"); auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size"); + auto WorkGroupNumDim = getPropIt("work_group_num_dim"); // Checking if device supports defined aspects if (AspectsPropIt) { @@ -2796,7 +2796,23 @@ checkDevSupportDeviceRequirements(const device &Dev, Dims++; } - if (NDRDesc.Dims != 0 && NDRDesc.Dims != static_cast(Dims)) + size_t UserProvidedNumDims = 0; + if (WorkGroupNumDim) { + // We know the dimensions have been padded to 3, make sure that the pad + // value is always set to 1 and record the number of dimensions specified + // by the user. + UserProvidedNumDims = + DeviceBinaryProperty(*(WorkGroupNumDim.value())).asUint32(); +#ifndef NDEBUG + for (unsigned i = UserProvidedNumDims; i < 3; ++i) + assert(ReqdWGSizeVec[i] == 1 && + "Incorrect padding in required work-group size metadata."); +#endif // NDEBUG + } else { + UserProvidedNumDims = Dims; + } + + if (NDRDesc.Dims != 0 && NDRDesc.Dims != UserProvidedNumDims) return sycl::exception( sycl::errc::nd_range, "The local size dimension of submitted nd_range doesn't match the " diff --git a/sycl/test-e2e/Basic/reqd_work_group_size.cpp b/sycl/test-e2e/Basic/reqd_work_group_size.cpp index aa010d277993c..1510461f97828 100644 --- a/sycl/test-e2e/Basic/reqd_work_group_size.cpp +++ b/sycl/test-e2e/Basic/reqd_work_group_size.cpp @@ -1,8 +1,5 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// -// Failing negative test with HIP -// UNSUPPORTED: hip #include diff --git a/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp b/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp index 76a5739e9ee35..8d1fc0fe39030 100644 --- a/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp +++ b/sycl/test-e2e/Basic/reqd_work_group_size_check_exception.cpp @@ -1,8 +1,6 @@ // RUN: %clangxx -fsycl -fsycl-targets=%{sycl_triple} %s -o %t.out // RUN: %{run} %t.out -// UNSUPPORTED: hip - #include #define CHECK_INVALID_REQD_WORK_GROUP_SIZE(Dim, ...) \ diff --git a/sycl/test-e2e/Basic/reqd_work_group_size_matches_dimensions.cpp b/sycl/test-e2e/Basic/reqd_work_group_size_matches_dimensions.cpp index ac3961d94622a..9349b52d09cb9 100644 --- a/sycl/test-e2e/Basic/reqd_work_group_size_matches_dimensions.cpp +++ b/sycl/test-e2e/Basic/reqd_work_group_size_matches_dimensions.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// https://github.com/intel/llvm/issues/9353 -// UNSUPPORTED: hip - #include "sycl/sycl.hpp" using namespace sycl;