diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 0bfbdb0d6e66a..093910b7caa88 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -303,6 +303,37 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( addNVVMMetadata(F, "maxntidz", MWGS->getXDimVal()); } + if (const auto *RWGS = FD->getAttr()) { + llvm::SmallVector, 3> Ops; + // Index-flip and pad out any missing elements. Note the misleading + // nomenclature of the methods: getXDimVal doesn't return the X dimension; + // it returns the left-most dimension (dim0). This could correspond to + // CUDA's X, Y, or Z, depending on the number of operands provided. + if (auto Dim0 = RWGS->getXDimVal()) + Ops.push_back(Dim0->getExtValue()); + if (auto Dim1 = RWGS->getYDimVal()) + Ops.push_back(Dim1->getExtValue()); + if (auto Dim2 = RWGS->getZDimVal()) + Ops.push_back(Dim2->getExtValue()); + std::reverse(Ops.begin(), Ops.end()); + Ops.append(3 - Ops.size(), std::nullopt); + + // Work-group sizes (in NVVM annotations) must be positive and less than + // INT32_MAX, whereas SYCL can allow for larger work-group sizes (see + // -fno-sycl-id-queries-fit-in-int). If any dimension is too large for + // NVPTX, don't emit any annotation at all. + if (llvm::all_of(Ops, [](std::optional V) { + return !V || llvm::isUInt<31>(*V); + })) { + if (auto X = Ops[0]) + addNVVMMetadata(F, "reqntidx", *X); + if (auto Y = Ops[1]) + addNVVMMetadata(F, "reqntidy", *Y); + if (auto Z = Ops[2]) + addNVVMMetadata(F, "reqntidz", *Z); + } + } + auto attrValue = [&](Expr *E) { const auto *CE = cast(E); std::optional Val = CE->getResultAsAPSInt(); diff --git a/clang/test/CodeGenSYCL/reqd-work-group-size.cpp b/clang/test/CodeGenSYCL/reqd-work-group-size.cpp index 542655a94ac3a..ddb28c3fac1d5 100644 --- a/clang/test/CodeGenSYCL/reqd-work-group-size.cpp +++ b/clang/test/CodeGenSYCL/reqd-work-group-size.cpp @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple nvptx64-nvidia-cuda -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NVPTX #include "sycl.hpp" @@ -123,6 +123,55 @@ int main() { // CHECK: define {{.*}} void @{{.*}}kernel_name22() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D22:[0-9]+]] // CHECK: define {{.*}} void @{{.*}}kernel_name24() #0 {{.*}} !work_group_num_dim ![[NDRWGS1D:[0-9]+]] !reqd_work_group_size ![[WGSIZE1D2:[0-9]+]] +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidx", i32 16} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidy", i32 16} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name1, !"reqntidz", i32 32} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidx", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidy", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name3, !"reqntidz", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidx", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidy", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name4, !"reqntidz", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidx", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidy", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name6, !"reqntidz", i32 1} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidx", i32 16} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidy", i32 16} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name7, !"reqntidz", i32 32} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidx", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidy", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name9, !"reqntidz", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidx", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidy", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name10, !"reqntidz", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidx", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidy", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name12, !"reqntidz", i32 1} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidx", i32 16} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name13, !"reqntidy", i32 32} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name13, !"reqntidz" +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidx", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name15, !"reqntidy", i32 8} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name15, !"reqntidz" +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidx", i32 2} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name16, !"reqntidy", i32 2} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name16, !"reqntidz" +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidx", i32 8} +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name18, !"reqntidy", i32 1} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name18, !"reqntidz" +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name19, !"reqntidx", i32 32} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidy", +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name19, !"reqntidz", +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name21, !"reqntidx", i32 8} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidy", +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name21, !"reqntidz", +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name22, !"reqntidx", i32 2} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidy", +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name22, !"reqntidz", +// CHECK-NVPTX: = !{ptr @{{.*}}kernel_name24, !"reqntidx", i32 1} +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidy", +// CHECK-NVPTX-NOT: = !{ptr @{{.*}}kernel_name24, !"reqntidz", + // CHECK: ![[NDRWGS3D]] = !{i32 3} // CHECK: ![[WGSIZE3D32]] = !{i32 16, i32 16, i32 32} // CHECK: ![[WGSIZE3D88]] = !{i32 8, i32 8, i32 8}