From 8d5c50f46e1e527a256633c6a3328ed8e79544fc Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Fri, 20 Dec 2024 23:09:28 +0000 Subject: [PATCH] [NVPTX] Switch front-ends and tests to ptx_kernel cc --- clang/lib/CodeGen/Targets/NVPTX.cpp | 39 +++-- clang/test/CodeGen/nvptx_attributes.c | 8 +- clang/test/CodeGenCUDA/device-fun-linkage.cu | 8 +- clang/test/CodeGenCUDA/grid-constant.cu | 8 +- clang/test/CodeGenCUDA/offload_via_llvm.cu | 4 +- clang/test/CodeGenCUDA/ptx-kernels.cu | 7 +- clang/test/CodeGenCUDA/usual-deallocators.cu | 4 +- clang/test/CodeGenOpenCL/ptx-calls.cl | 4 +- clang/test/CodeGenOpenCL/ptx-kernels.cl | 4 +- clang/test/CodeGenOpenCL/reflect.cl | 10 +- clang/test/Headers/gpuintrin.c | 2 +- .../Target/NVPTX/NVPTXCtorDtorLowering.cpp | 18 +-- llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 6 +- .../UniformityAnalysis/NVPTX/daorder.ll | 5 +- .../UniformityAnalysis/NVPTX/diverge.ll | 16 +- .../NVPTX/hidden_diverge.ll | 5 +- .../UniformityAnalysis/NVPTX/irreducible.ll | 4 +- llvm/test/CodeGen/NVPTX/b52037.ll | 5 +- llvm/test/CodeGen/NVPTX/bug21465.ll | 6 +- llvm/test/CodeGen/NVPTX/bug22322.ll | 5 +- llvm/test/CodeGen/NVPTX/bug26185.ll | 13 +- .../CodeGen/NVPTX/call-with-alloca-buffer.ll | 6 +- llvm/test/CodeGen/NVPTX/cluster-dim.ll | 7 +- llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll | 6 +- llvm/test/CodeGen/NVPTX/i1-array-global.ll | 6 +- llvm/test/CodeGen/NVPTX/i1-ext-load.ll | 6 +- llvm/test/CodeGen/NVPTX/i1-global.ll | 6 +- llvm/test/CodeGen/NVPTX/i1-param.ll | 6 +- llvm/test/CodeGen/NVPTX/intr-range.ll | 18 +-- llvm/test/CodeGen/NVPTX/kernel-param-align.ll | 8 +- .../NVPTX/load-with-non-coherent-cache.ll | 59 +++---- llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 4 +- llvm/test/CodeGen/NVPTX/lower-alloca.ll | 4 +- .../CodeGen/NVPTX/lower-args-gridconstant.ll | 84 +++++----- llvm/test/CodeGen/NVPTX/lower-args.ll | 13 +- llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 150 ++++++++---------- llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll | 6 +- .../CodeGen/NVPTX/lower-kernel-ptr-arg.ll | 10 +- llvm/test/CodeGen/NVPTX/maxclusterrank.ll | 5 +- .../CodeGen/NVPTX/noduplicate-syncthreads.ll | 5 +- llvm/test/CodeGen/NVPTX/noreturn.ll | 9 +- llvm/test/CodeGen/NVPTX/nvcl-param-align.ll | 5 +- llvm/test/CodeGen/NVPTX/refl1.ll | 6 +- llvm/test/CodeGen/NVPTX/reg-copy.ll | 6 +- llvm/test/CodeGen/NVPTX/simple-call.ll | 8 +- llvm/test/CodeGen/NVPTX/surf-read-cuda.ll | 14 +- llvm/test/CodeGen/NVPTX/surf-read.ll | 7 +- llvm/test/CodeGen/NVPTX/surf-tex.py | 36 ++--- llvm/test/CodeGen/NVPTX/surf-write-cuda.ll | 10 +- llvm/test/CodeGen/NVPTX/surf-write.ll | 7 +- llvm/test/CodeGen/NVPTX/tex-read-cuda.ll | 13 +- llvm/test/CodeGen/NVPTX/tex-read.ll | 5 +- llvm/test/CodeGen/NVPTX/unreachable.ll | 5 +- llvm/test/DebugInfo/NVPTX/debug-addr-class.ll | 4 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 8 +- .../LoopStrengthReduce/NVPTX/trunc.ll | 4 +- .../NVPTX/speculative-slsr.ll | 6 +- .../Dialect/NVVM/NVVMToLLVMIRTranslation.cpp | 10 +- mlir/test/Target/LLVMIR/nvvmir.mlir | 29 ++-- 59 files changed, 305 insertions(+), 477 deletions(-) diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp index 0431d2cc4ddc3..b82e4ddb9f3f2 100644 --- a/clang/lib/CodeGen/Targets/NVPTX.cpp +++ b/clang/lib/CodeGen/Targets/NVPTX.cpp @@ -9,6 +9,7 @@ #include "ABIInfoImpl.h" #include "TargetInfo.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/IntrinsicsNVPTX.h" using namespace clang; @@ -79,13 +80,11 @@ class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo { // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the // resulting MDNode to the nvvm.annotations MDNode. static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand, - const SmallVectorImpl &GridConstantArgs); + int Operand); - static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name, - int Operand) { - addNVVMMetadata(GV, Name, Operand, SmallVector(0)); - } + static void + addGridConstantNVVMMetadata(llvm::GlobalValue *GV, + const SmallVectorImpl &GridConstantArgs); private: static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst, @@ -259,7 +258,7 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( if (FD->hasAttr()) { // OpenCL __kernel functions get kernel metadata // Create !{, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1); + F->setCallingConv(llvm::CallingConv::PTX_Kernel); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } @@ -277,7 +276,8 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // For some reason arg indices are 1-based in NVVM GCI.push_back(IV.index() + 1); // Create !{, metadata !"kernel", i32 1} node - addNVVMMetadata(F, "kernel", 1, GCI); + F->setCallingConv(llvm::CallingConv::PTX_Kernel); + addGridConstantNVVMMetadata(F, GCI); } if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) M.handleCUDALaunchBoundsAttr(F, Attr); @@ -285,13 +285,12 @@ void NVPTXTargetCodeGenInfo::setTargetAttributes( // Attach kernel metadata directly if compiling for NVPTX. if (FD->hasAttr()) { - addNVVMMetadata(F, "kernel", 1); + F->setCallingConv(llvm::CallingConv::PTX_Kernel); } } -void NVPTXTargetCodeGenInfo::addNVVMMetadata( - llvm::GlobalValue *GV, StringRef Name, int Operand, - const SmallVectorImpl &GridConstantArgs) { +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV, + StringRef Name, int Operand) { llvm::Module *M = GV->getParent(); llvm::LLVMContext &Ctx = M->getContext(); @@ -302,6 +301,21 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata( llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))}; + + // Append metadata to nvvm.annotations + MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); +} + +void NVPTXTargetCodeGenInfo::addGridConstantNVVMMetadata( + llvm::GlobalValue *GV, const SmallVectorImpl &GridConstantArgs) { + + llvm::Module *M = GV->getParent(); + llvm::LLVMContext &Ctx = M->getContext(); + + // Get "nvvm.annotations" metadata node + llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); + + SmallVector MDVals = {llvm::ConstantAsMetadata::get(GV)}; if (!GridConstantArgs.empty()) { SmallVector GCM; for (int I : GridConstantArgs) @@ -310,6 +324,7 @@ void NVPTXTargetCodeGenInfo::addNVVMMetadata( MDVals.append({llvm::MDString::get(Ctx, "grid_constant"), llvm::MDNode::get(Ctx, GCM)}); } + // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } diff --git a/clang/test/CodeGen/nvptx_attributes.c b/clang/test/CodeGen/nvptx_attributes.c index 7dbd9f1321e28..8b9f3a2c18a1d 100644 --- a/clang/test/CodeGen/nvptx_attributes.c +++ b/clang/test/CodeGen/nvptx_attributes.c @@ -10,8 +10,14 @@ // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RET_ADDR]], align 8 // CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4 // CHECK-NEXT: ret void +// __attribute__((nvptx_kernel)) void foo(int *ret) { *ret = 1; } -// CHECK: !0 = !{ptr @foo, !"kernel", i32 1} +//. +// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx32,+sm_61" } +//. +// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +//. diff --git a/clang/test/CodeGenCUDA/device-fun-linkage.cu b/clang/test/CodeGenCUDA/device-fun-linkage.cu index 54899e0e9c0f1..bdac62d1d03e8 100644 --- a/clang/test/CodeGenCUDA/device-fun-linkage.cu +++ b/clang/test/CodeGenCUDA/device-fun-linkage.cu @@ -17,8 +17,8 @@ template __device__ void func(); // RDC: define weak_odr void @_Z4funcIiEvv() template __global__ void kernel(); -// NORDC: define void @_Z6kernelIiEvv() -// RDC: define weak_odr void @_Z6kernelIiEvv() +// NORDC: define ptx_kernel void @_Z6kernelIiEvv() +// RDC: define weak_odr ptx_kernel void @_Z6kernelIiEvv() // Ensure that unused static device function is eliminated static __device__ void static_func() {} @@ -28,5 +28,5 @@ static __device__ void static_func() {} // Ensure that kernel function has external or weak_odr // linkage regardless static specifier static __global__ void static_kernel() {} -// NORDC: define void @_ZL13static_kernelv() -// RDC: define weak_odr void @_ZL13static_kernelv[[FILEID:.*]]() +// NORDC: define ptx_kernel void @_ZL13static_kernelv() +// RDC: define weak_odr ptx_kernel void @_ZL13static_kernelv[[FILEID:.*]]() diff --git a/clang/test/CodeGenCUDA/grid-constant.cu b/clang/test/CodeGenCUDA/grid-constant.cu index 8d4be9c9dc7e1..e7000cab3cda5 100644 --- a/clang/test/CodeGenCUDA/grid-constant.cu +++ b/clang/test/CodeGenCUDA/grid-constant.cu @@ -21,11 +21,11 @@ void foo() { } //. //. -// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"kernel", i32 1, !"grid_constant", [[META1:![0-9]+]]} +// CHECK: [[META0:![0-9]+]] = !{ptr @_Z6kernel1Sii, !"grid_constant", [[META1:![0-9]+]]} // CHECK: [[META1]] = !{i32 1, i32 3} -// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3:![0-9]+]]} +// CHECK: [[META2:![0-9]+]] = !{ptr @_Z13tkernel_constIK1SEvT_, !"grid_constant", [[META3:![0-9]+]]} // CHECK: [[META3]] = !{i32 1} -// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"kernel", i32 1, !"grid_constant", [[META3]]} -// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"kernel", i32 1, !"grid_constant", [[META6:![0-9]+]]} +// CHECK: [[META4:![0-9]+]] = !{ptr @_Z13tkernel_constI1SEvT_, !"grid_constant", [[META3]]} +// CHECK: [[META5:![0-9]+]] = !{ptr @_Z7tkernelIK1SEviT_, !"grid_constant", [[META6:![0-9]+]]} // CHECK: [[META6]] = !{i32 2} //. diff --git a/clang/test/CodeGenCUDA/offload_via_llvm.cu b/clang/test/CodeGenCUDA/offload_via_llvm.cu index 434eba99c1795..62942d8dc0755 100644 --- a/clang/test/CodeGenCUDA/offload_via_llvm.cu +++ b/clang/test/CodeGenCUDA/offload_via_llvm.cu @@ -7,7 +7,7 @@ #define __OFFLOAD_VIA_LLVM__ 1 #include "Inputs/cuda.h" -// HST-LABEL: define dso_local void @_Z18__device_stub__fooisPvS_( +// HST-LABEL: define dso_local ptx_kernel void @_Z18__device_stub__fooisPvS_( // HST-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] { // HST-NEXT: [[ENTRY:.*:]] // HST-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 @@ -50,7 +50,7 @@ // HST: [[SETUP_END]]: // HST-NEXT: ret void // -// DEV-LABEL: define dso_local void @_Z3fooisPvS_( +// DEV-LABEL: define dso_local ptx_kernel void @_Z3fooisPvS_( // DEV-SAME: i32 noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], ptr noundef [[TMP2:%.*]], ptr noundef [[TMP3:%.*]]) #[[ATTR0:[0-9]+]] { // DEV-NEXT: [[ENTRY:.*:]] // DEV-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu index b7172b7736929..a7d5e11bd496f 100644 --- a/clang/test/CodeGenCUDA/ptx-kernels.cu +++ b/clang/test/CodeGenCUDA/ptx-kernels.cu @@ -10,7 +10,7 @@ extern "C" __device__ void device_function() {} -// CHECK-LABEL: define{{.*}} void @global_function +// CHECK-LABEL: define{{.*}} ptx_kernel void @global_function extern "C" __global__ void global_function() { // CHECK: call void @device_function @@ -19,7 +19,7 @@ __global__ void global_function() { // Make sure host-instantiated kernels are preserved on device side. template __global__ void templated_kernel(T param) {} -// CHECK-DAG: define{{.*}} void @_Z16templated_kernelIiEvT_( +// CHECK-DAG: define{{.*}} ptx_kernel void @_Z16templated_kernelIiEvT_( namespace { __global__ void anonymous_ns_kernel() {} @@ -30,6 +30,3 @@ void host_function() { templated_kernel<<<0, 0>>>(0); anonymous_ns_kernel<<<0,0>>>(); } - -// CHECK: !{{[0-9]+}} = !{ptr @global_function, !"kernel", i32 1} -// CHECK: !{{[0-9]+}} = !{ptr @_Z16templated_kernelIiEvT_, !"kernel", i32 1} diff --git a/clang/test/CodeGenCUDA/usual-deallocators.cu b/clang/test/CodeGenCUDA/usual-deallocators.cu index b85a706813fc2..64560efb74135 100644 --- a/clang/test/CodeGenCUDA/usual-deallocators.cu +++ b/clang/test/CodeGenCUDA/usual-deallocators.cu @@ -109,7 +109,7 @@ __host__ __device__ void tests_hd(void *t) { } // Make sure that we've generated the kernel used by A::~A. -// DEVICE-LABEL: define void @_Z1fIiEvT_ +// DEVICE-LABEL: define ptx_kernel void @_Z1fIiEvT_ // Make sure we've picked deallocator for the correct side of compilation. @@ -147,5 +147,3 @@ __host__ __device__ void tests_hd(void *t) { // COMMON-LABEL: define linkonce_odr void @_ZN8H1H2D1D2dlEPv(ptr noundef %0) // DEVICE: call void @dev_fn() // HOST: call void @host_fn() - -// DEVICE: !0 = !{ptr @_Z1fIiEvT_, !"kernel", i32 1} diff --git a/clang/test/CodeGenOpenCL/ptx-calls.cl b/clang/test/CodeGenOpenCL/ptx-calls.cl index 0081152ae40e0..ae187173b1730 100644 --- a/clang/test/CodeGenOpenCL/ptx-calls.cl +++ b/clang/test/CodeGenOpenCL/ptx-calls.cl @@ -7,7 +7,5 @@ void device_function() { __kernel void kernel_function() { device_function(); } -// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function() +// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function() // CHECK: call void @device_function() -// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1} - diff --git a/clang/test/CodeGenOpenCL/ptx-kernels.cl b/clang/test/CodeGenOpenCL/ptx-kernels.cl index 210e5682ac721..eac0df4abfbea 100644 --- a/clang/test/CodeGenOpenCL/ptx-kernels.cl +++ b/clang/test/CodeGenOpenCL/ptx-kernels.cl @@ -6,6 +6,4 @@ void device_function() { __kernel void kernel_function() { } -// CHECK-LABEL: define{{.*}} spir_kernel void @kernel_function() - -// CHECK: !{{[0-9]+}} = !{ptr @kernel_function, !"kernel", i32 1} +// CHECK-LABEL: define{{.*}} ptx_kernel void @kernel_function() diff --git a/clang/test/CodeGenOpenCL/reflect.cl b/clang/test/CodeGenOpenCL/reflect.cl index 9ae4a5f027d35..f5b618f6a35d3 100644 --- a/clang/test/CodeGenOpenCL/reflect.cl +++ b/clang/test/CodeGenOpenCL/reflect.cl @@ -12,8 +12,8 @@ bool device_function() { return __nvvm_reflect("__CUDA_ARCH") >= 700; } -// CHECK-LABEL: define dso_local spir_kernel void @kernel_function( -// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { +// CHECK-LABEL: define dso_local ptx_kernel void @kernel_function( +// CHECK-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 4 // CHECK-NEXT: store ptr addrspace(1) [[I]], ptr [[I_ADDR]], align 4 @@ -26,3 +26,9 @@ bool device_function() { __kernel void kernel_function(__global int *i) { *i = device_function(); } +//. +// CHECK: [[META3]] = !{i32 1} +// CHECK: [[META4]] = !{!"none"} +// CHECK: [[META5]] = !{!"int*"} +// CHECK: [[META6]] = !{!""} +//. diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index 2e45f73692f53..281339716c3ed 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -44,7 +44,7 @@ // AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]] // AMDGPU-NEXT: unreachable // -// NVPTX-LABEL: define protected void @foo( +// NVPTX-LABEL: define protected ptx_kernel void @foo( // NVPTX-SAME: ) #[[ATTR0:[0-9]+]] { // NVPTX-NEXT: [[ENTRY:.*:]] // NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]] diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp index f940dc05948b3..c03ef8d33220c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" @@ -49,39 +50,34 @@ static std::string getHash(StringRef Str) { return llvm::utohexstr(Hash.low(), /*LowerCase=*/true); } -static void addKernelMetadata(Module &M, GlobalValue *GV) { +static void addKernelMetadata(Module &M, Function *F) { llvm::LLVMContext &Ctx = M.getContext(); // Get "nvvm.annotations" metadata node. llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); - llvm::Metadata *KernelMDVals[] = { - llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "kernel"), - llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; - // This kernel is only to be called single-threaded. llvm::Metadata *ThreadXMDVals[] = { - llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidx"), + llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidx"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; llvm::Metadata *ThreadYMDVals[] = { - llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidy"), + llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidy"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; llvm::Metadata *ThreadZMDVals[] = { - llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, "maxntidz"), + llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxntidz"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; llvm::Metadata *BlockMDVals[] = { - llvm::ConstantAsMetadata::get(GV), + llvm::ConstantAsMetadata::get(F), llvm::MDString::get(Ctx, "maxclusterrank"), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))}; // Append metadata to nvvm.annotations. - MD->addOperand(llvm::MDNode::get(Ctx, KernelMDVals)); + F->setCallingConv(CallingConv::PTX_Kernel); MD->addOperand(llvm::MDNode::get(Ctx, ThreadXMDVals)); MD->addOperand(llvm::MDNode::get(Ctx, ThreadYMDVals)); MD->addOperand(llvm::MDNode::get(Ctx, ThreadZMDVals)); diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 98bffd92a087b..0f2bec711b249 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -311,11 +311,13 @@ std::optional getMaxNReg(const Function &F) { } bool isKernelFunction(const Function &F) { + if (F.getCallingConv() == CallingConv::PTX_Kernel) + return true; + if (const auto X = findOneNVVMAnnotation(&F, "kernel")) return (*X == 1); - // There is no NVVM metadata, check the calling convention - return F.getCallingConv() == CallingConv::PTX_Kernel; + return false; } MaybeAlign getAlign(const Function &F, unsigned Index) { diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll index 89d8c5aa90ab1..14f33d79b471d 100644 --- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/daorder.ll @@ -3,7 +3,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" -define i32 @daorder(i32 %n) { +define ptx_kernel i32 @daorder(i32 %n) { ; CHECK-LABEL: for function 'daorder' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -43,6 +43,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() declare i32 @llvm.nvvm.read.ptx.sreg.laneid() - -!nvvm.annotations = !{!0} -!0 = !{ptr @daorder, !"kernel", i32 1} diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll index 0ac1b5f541471..cf8ffadcd073c 100644 --- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/diverge.ll @@ -4,7 +4,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" ; return (n < 0 ? a + threadIdx.x : b + threadIdx.x) -define i32 @no_diverge(i32 %n, i32 %a, i32 %b) { +define ptx_kernel i32 @no_diverge(i32 %n, i32 %a, i32 %b) { ; CHECK-LABEL: for function 'no_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -27,7 +27,7 @@ merge: ; if (threadIdx.x < 5) // divergent: data dependent ; c = b; ; return c; // c is divergent: sync dependent -define i32 @sync(i32 %a, i32 %b) { +define ptx_kernel i32 @sync(i32 %a, i32 %b) { ; CHECK-LABEL: for function 'sync' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() @@ -49,7 +49,7 @@ bb3: ; } ; // c here is divergent because it is sync dependent on threadIdx.x >= 5 ; return c; -define i32 @mixed(i32 %n, i32 %a, i32 %b) { +define ptx_kernel i32 @mixed(i32 %n, i32 %a, i32 %b) { ; CHECK-LABEL: for function 'mixed' bb1: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.z() @@ -101,7 +101,7 @@ merge: ; return i == 10 ? 0 : 1; // i here is divergent ; ; The i defined in the loop is used outside. -define i32 @loop() { +define ptx_kernel i32 @loop() { ; CHECK-LABEL: for function 'loop' entry: %laneid = call i32 @llvm.nvvm.read.ptx.sreg.laneid() @@ -149,7 +149,7 @@ else: } ; Verifies sync-dependence is computed correctly in the absense of loops. -define i32 @sync_no_loop(i32 %arg) { +define ptx_kernel i32 @sync_no_loop(i32 %arg) { ; CHECK-LABEL: for function 'sync_no_loop' entry: %0 = add i32 %arg, 1 @@ -174,9 +174,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() declare i32 @llvm.nvvm.read.ptx.sreg.laneid() -!nvvm.annotations = !{!0, !1, !2, !3, !4} -!0 = !{ptr @no_diverge, !"kernel", i32 1} -!1 = !{ptr @sync, !"kernel", i32 1} -!2 = !{ptr @mixed, !"kernel", i32 1} -!3 = !{ptr @loop, !"kernel", i32 1} -!4 = !{ptr @sync_no_loop, !"kernel", i32 1} diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll index e319211771c0c..65512bf572f83 100644 --- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/hidden_diverge.ll @@ -3,7 +3,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" -define i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) { +define ptx_kernel i32 @hidden_diverge(i32 %n, i32 %a, i32 %b) { ; CHECK-LABEL: for function 'hidden_diverge' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -27,6 +27,3 @@ merge: } declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() - -!nvvm.annotations = !{!0} -!0 = !{ptr @hidden_diverge, !"kernel", i32 1} diff --git a/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll index cd729a918f814..e1ecc69871b98 100644 --- a/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll +++ b/llvm/test/Analysis/UniformityAnalysis/NVPTX/irreducible.ll @@ -23,7 +23,7 @@ target triple = "nvptx64-nvidia-cuda" ; V ; if (i3 == 5) // divergent ; because sync dependent on (tid / i3). -define i32 @unstructured_loop(i1 %entry_cond) { +define ptx_kernel i32 @unstructured_loop(i1 %entry_cond) { ; CHECK-LABEL: for function 'unstructured_loop' entry: %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -59,5 +59,3 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() declare i32 @llvm.nvvm.read.ptx.sreg.tid.z() declare i32 @llvm.nvvm.read.ptx.sreg.laneid() -!nvvm.annotations = !{!0} -!0 = !{ptr @unstructured_loop, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/b52037.ll b/llvm/test/CodeGen/NVPTX/b52037.ll index 5d1c390909f6a..b6317dfb28597 100644 --- a/llvm/test/CodeGen/NVPTX/b52037.ll +++ b/llvm/test/CodeGen/NVPTX/b52037.ll @@ -39,7 +39,7 @@ declare %int3 @hoge(i32, i32, i32) local_unnamed_addr declare i64 @foo() local_unnamed_addr -define void @barney(ptr nocapture readonly %arg) local_unnamed_addr { +define ptx_kernel void @barney(ptr nocapture readonly %arg) local_unnamed_addr { bb: tail call void asm sideeffect "// KEEP", ""() #1 %tmp = alloca %struct.zot, align 16 @@ -210,9 +210,6 @@ bb14: ; preds = %bb49.i.lr.ph, %bb49 attributes #0 = { argmemonly mustprogress nofree nounwind willreturn } attributes #1 = { nounwind } -!nvvm.annotations = !{!0} - -!0 = !{ptr @barney, !"kernel", i32 1} !1 = !{!2, !11, i64 64} !2 = !{!"_ZTSN7cuneibs22neiblist_iterator_coreE", !3, i64 0, !3, i64 8, !6, i64 16, !8, i64 32, !9, i64 44, !10, i64 48, !11, i64 64, !9, i64 72, !4, i64 76, !9, i64 80} !3 = !{!"any pointer", !4, i64 0} diff --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll index 9b1f1049c6487..76300e3cfdc5b 100644 --- a/llvm/test/CodeGen/NVPTX/bug21465.ll +++ b/llvm/test/CodeGen/NVPTX/bug21465.ll @@ -8,7 +8,7 @@ target triple = "nvptx64-unknown-unknown" %struct.S = type { i32, i32 } ; Function Attrs: nounwind -define void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 { +define ptx_kernel void @_Z11TakesStruct1SPi(ptr byval(%struct.S) nocapture readonly %input, ptr nocapture %output) #0 { entry: ; CHECK-LABEL: @_Z11TakesStruct1SPi ; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi( @@ -23,7 +23,3 @@ entry: } attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!nvvm.annotations = !{!0} - -!0 = !{ptr @_Z11TakesStruct1SPi, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/bug22322.ll b/llvm/test/CodeGen/NVPTX/bug22322.ll index e3656fd16b215..ace31667184b0 100644 --- a/llvm/test/CodeGen/NVPTX/bug22322.ll +++ b/llvm/test/CodeGen/NVPTX/bug22322.ll @@ -8,7 +8,7 @@ target triple = "nvptx64-nvidia-cuda" ; Function Attrs: nounwind ; CHECK-LABEL: some_kernel -define void @some_kernel(ptr nocapture %dst) #0 { +define ptx_kernel void @some_kernel(ptr nocapture %dst) #0 { _ZL11compute_vecRK6float3jb.exit: %ret_vec.sroa.8.i = alloca float, align 4 %0 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() @@ -55,8 +55,5 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "n attributes #1 = { nounwind readnone } attributes #2 = { nounwind } -!nvvm.annotations = !{!0} !llvm.ident = !{!1} - -!0 = !{ptr @some_kernel, !"kernel", i32 1} !1 = !{!"clang version 3.5.1 (tags/RELEASE_351/final)"} diff --git a/llvm/test/CodeGen/NVPTX/bug26185.ll b/llvm/test/CodeGen/NVPTX/bug26185.ll index 00c97fb381e0e..193df7f86ca72 100644 --- a/llvm/test/CodeGen/NVPTX/bug26185.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185.ll @@ -8,7 +8,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-unknown-unknown" ; CHECK-LABEL: ex_zext -define void @ex_zext(ptr noalias readonly %data, ptr %res) { +define ptx_kernel void @ex_zext(ptr noalias readonly %data, ptr %res) { entry: ; CHECK: ld.global.nc.u8 %val = load i8, ptr %data @@ -19,7 +19,7 @@ entry: } ; CHECK-LABEL: ex_sext -define void @ex_sext(ptr noalias readonly %data, ptr %res) { +define ptx_kernel void @ex_sext(ptr noalias readonly %data, ptr %res) { entry: ; CHECK: ld.global.nc.u8 %val = load i8, ptr %data @@ -30,7 +30,7 @@ entry: } ; CHECK-LABEL: ex_zext_v2 -define void @ex_zext_v2(ptr noalias readonly %data, ptr %res) { +define ptx_kernel void @ex_zext_v2(ptr noalias readonly %data, ptr %res) { entry: ; CHECK: ld.global.nc.v2.u8 %val = load <2 x i8>, ptr %data @@ -41,7 +41,7 @@ entry: } ; CHECK-LABEL: ex_sext_v2 -define void @ex_sext_v2(ptr noalias readonly %data, ptr %res) { +define ptx_kernel void @ex_sext_v2(ptr noalias readonly %data, ptr %res) { entry: ; CHECK: ld.global.nc.v2.u8 %val = load <2 x i8>, ptr %data @@ -51,8 +51,3 @@ entry: ret void } -!nvvm.annotations = !{!0,!1,!2,!3} -!0 = !{ptr @ex_zext, !"kernel", i32 1} -!1 = !{ptr @ex_sext, !"kernel", i32 1} -!2 = !{ptr @ex_zext_v2, !"kernel", i32 1} -!3 = !{ptr @ex_sext_v2, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 19f4ef8ec77b9..1c9d271902fd3 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -16,7 +16,7 @@ ; } ; CHECK: .visible .entry kernel_func -define void @kernel_func(ptr %a) { +define ptx_kernel void @kernel_func(ptr %a) { entry: %buf = alloca [16 x i8], align 4 @@ -56,7 +56,3 @@ entry: } declare void @callee(ptr, ptr) - -!nvvm.annotations = !{!0} - -!0 = !{ptr @kernel_func, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/cluster-dim.ll b/llvm/test/CodeGen/NVPTX/cluster-dim.ll index c9258addbe04d..9275c895b224a 100644 --- a/llvm/test/CodeGen/NVPTX/cluster-dim.ll +++ b/llvm/test/CodeGen/NVPTX/cluster-dim.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90 | FileCheck -check-prefixes=CHECK90 %s ; RUN: %if ptxas-12.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %} -define void @kernel_func_clusterxyz() { +define ptx_kernel void @kernel_func_clusterxyz() { ; CHECK80-LABEL: kernel_func_clusterxyz( ; CHECK80: { ; CHECK80-EMPTY: @@ -23,7 +23,6 @@ define void @kernel_func_clusterxyz() { } -!nvvm.annotations = !{!1, !2} +!nvvm.annotations = !{!1} -!1 = !{ptr @kernel_func_clusterxyz, !"kernel", i32 1} -!2 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7} +!1 = !{ptr @kernel_func_clusterxyz, !"cluster_dim_x", i32 3, !"cluster_dim_y", i32 5, !"cluster_dim_z", i32 7} diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll index 43e4dfca1456d..2b6631154e387 100644 --- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll +++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -12,7 +12,7 @@ target triple = "nvptx-nvidia-cuda" @myconst = internal constant i32 420, align 4 -define void @foo(ptr %a, ptr %b) { +define ptx_kernel void @foo(ptr %a, ptr %b) { ; Expect one load -- @myconst isn't loaded from, because we know its value ; statically. ; CHECK: ld.global.u32 @@ -24,7 +24,3 @@ define void @foo(ptr %a, ptr %b) { store i32 %ld2, ptr %b ret void } - - -!nvvm.annotations = !{!0} -!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/i1-array-global.ll b/llvm/test/CodeGen/NVPTX/i1-array-global.ll index ff3848b6f8f75..20b376f94c0d9 100644 --- a/llvm/test/CodeGen/NVPTX/i1-array-global.ll +++ b/llvm/test/CodeGen/NVPTX/i1-array-global.ll @@ -7,13 +7,9 @@ target triple = "nvptx-nvidia-cuda" @global_cst = private constant [6 x i1] [i1 true, i1 false, i1 true, i1 false, i1 true, i1 false] ; CHECK: .global .align 1 .b8 global_cst[6] = {1, 0, 1, 0, 1} -define void @kernel(i32 %i, ptr %out) { +define ptx_kernel void @kernel(i32 %i, ptr %out) { %5 = getelementptr inbounds i1, ptr @global_cst, i32 %i %6 = load i1, ptr %5, align 1 store i1 %6, ptr %out, align 1 ret void } - -!nvvm.annotations = !{!0} -!0 = !{ptr @kernel, !"kernel", i32 1} - diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll index 83f8f80919f80..f5f1dd9fcf0ea 100644 --- a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll +++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll @@ -5,7 +5,7 @@ target triple = "nvptx-nvidia-cuda" -define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) { +define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) { ; CHECK-LABEL: foo( ; CHECK: .reg .b16 %rs<2>; ; CHECK: .reg .b32 %r<4>; @@ -28,7 +28,3 @@ define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) { store i32 %and, ptr %retval ret void } - -!nvvm.annotations = !{!0} - -!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/i1-global.ll b/llvm/test/CodeGen/NVPTX/i1-global.ll index 17af1fa29e6c2..60d2ccd464194 100644 --- a/llvm/test/CodeGen/NVPTX/i1-global.ll +++ b/llvm/test/CodeGen/NVPTX/i1-global.ll @@ -8,13 +8,9 @@ target triple = "nvptx-nvidia-cuda" @mypred = addrspace(1) global i1 true, align 1 -define void @foo(i1 %p, ptr %out) { +define ptx_kernel void @foo(i1 %p, ptr %out) { %ld = load i1, ptr addrspace(1) @mypred %val = zext i1 %ld to i32 store i32 %val, ptr %out ret void } - - -!nvvm.annotations = !{!0} -!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/i1-param.ll b/llvm/test/CodeGen/NVPTX/i1-param.ll index 3c74ee6aaa3b5..14d417bca459d 100644 --- a/llvm/test/CodeGen/NVPTX/i1-param.ll +++ b/llvm/test/CodeGen/NVPTX/i1-param.ll @@ -9,12 +9,8 @@ target triple = "nvptx-nvidia-cuda" ; CHECK: .entry foo ; CHECK: .param .u8 foo_param_0 ; CHECK: .param .u64 .ptr .align 1 foo_param_1 -define void @foo(i1 %p, ptr %out) { +define ptx_kernel void @foo(i1 %p, ptr %out) { %val = zext i1 %p to i32 store i32 %val, ptr %out ret void } - - -!nvvm.annotations = !{!0} -!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/intr-range.ll b/llvm/test/CodeGen/NVPTX/intr-range.ll index 2f3e08a039f52..86776ab09efc6 100644 --- a/llvm/test/CodeGen/NVPTX/intr-range.ll +++ b/llvm/test/CodeGen/NVPTX/intr-range.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --version 5 ; RUN: opt < %s -S -mtriple=nvptx-nvidia-cuda -mcpu=sm_20 -passes=nvvm-intr-range | FileCheck %s -define i32 @test_maxntid() { -; CHECK-LABEL: define i32 @test_maxntid( +define ptx_kernel i32 @test_maxntid() { +; CHECK-LABEL: define ptx_kernel i32 @test_maxntid( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.x() ; CHECK-NEXT: [[TMP3:%.*]] = call range(i32 0, 96) i32 @llvm.nvvm.read.ptx.sreg.tid.y() @@ -31,8 +31,8 @@ define i32 @test_maxntid() { ret i32 %11 } -define i32 @test_reqntid() { -; CHECK-LABEL: define i32 @test_reqntid( +define ptx_kernel i32 @test_reqntid() { +; CHECK-LABEL: define ptx_kernel i32 @test_reqntid( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.x() ; CHECK-NEXT: [[TMP5:%.*]] = call range(i32 0, 20) i32 @llvm.nvvm.read.ptx.sreg.tid.y() @@ -64,8 +64,8 @@ define i32 @test_reqntid() { ;; A case like this could occur if a function with the sreg intrinsic was ;; inlined into a kernel where the tid metadata is present, ensure the range is ;; updated. -define i32 @test_inlined() { -; CHECK-LABEL: define i32 @test_inlined( +define ptx_kernel i32 @test_inlined() { +; CHECK-LABEL: define ptx_kernel i32 @test_inlined( ; CHECK-SAME: ) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = call range(i32 0, 4) i32 @llvm.nvvm.read.ptx.sreg.tid.x() ; CHECK-NEXT: ret i32 [[TMP1]] @@ -83,6 +83,6 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() declare i32 @llvm.nvvm.read.ptx.sreg.ntid.z() !nvvm.annotations = !{!0, !1, !2} -!0 = !{ptr @test_maxntid, !"kernel", i32 1, !"maxntidx", i32 32, !"maxntidz", i32 3} -!1 = !{ptr @test_reqntid, !"kernel", i32 1, !"reqntidx", i32 20} -!2 = !{ptr @test_inlined, !"kernel", i32 1, !"maxntidx", i32 4} +!0 = !{ptr @test_maxntid, !"maxntidx", i32 32, !"maxntidz", i32 3} +!1 = !{ptr @test_reqntid, !"reqntidx", i32 20} +!2 = !{ptr @test_inlined, !"maxntidx", i32 4} diff --git a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll index 93d428d6fe6f4..2889d2d89a857 100644 --- a/llvm/test/CodeGen/NVPTX/kernel-param-align.ll +++ b/llvm/test/CodeGen/NVPTX/kernel-param-align.ll @@ -10,7 +10,7 @@ ; CHECK: .param .u64 .ptr .shared .align 8 func_align_param_3 ; CHECK: .param .u64 .ptr .const .align 16 func_align_param_4 ; CHECK: .param .u64 .ptr .local .align 32 func_align_param_5 -define void @func_align(ptr nocapture readonly align 1 %input, +define ptx_kernel void @func_align(ptr nocapture readonly align 1 %input, ptr nocapture align 2 %out, ptr addrspace(1) align 4 %global, ptr addrspace(3) align 8 %shared, @@ -27,7 +27,7 @@ entry: ; CHECK: .param .u64 .ptr .shared .align 1 func_noalign_param_3 ; CHECK: .param .u64 .ptr .const .align 1 func_noalign_param_4 ; CHECK: .param .u64 .ptr .local .align 1 func_noalign_param_5 -define void @func_noalign(ptr nocapture readonly %input, +define ptx_kernel void @func_noalign(ptr nocapture readonly %input, ptr nocapture %out, ptr addrspace(1) %global, ptr addrspace(3) %shared, @@ -36,7 +36,3 @@ define void @func_noalign(ptr nocapture readonly %input, entry: ret void } - -!nvvm.annotations = !{!0, !1} -!0 = !{ptr @func_align, !"kernel", i32 1} -!1 = !{ptr @func_noalign, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll index bdaeccd53fac9..dc1917f3b1507 100644 --- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll +++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll @@ -10,7 +10,7 @@ target triple = "nvptx64-unknown-unknown" ; SM20: ld.global.f32 ; SM35-LABEL: .visible .entry foo1( ; SM35: ld.global.nc.f32 -define void @foo1(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo1(ptr noalias readonly %from, ptr %to) { %1 = load float, ptr %from store float %1, ptr %to ret void @@ -20,7 +20,7 @@ define void @foo1(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.f64 ; SM35-LABEL: .visible .entry foo2( ; SM35: ld.global.nc.f64 -define void @foo2(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo2(ptr noalias readonly %from, ptr %to) { %1 = load double, ptr %from store double %1, ptr %to ret void @@ -30,7 +30,7 @@ define void @foo2(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.u16 ; SM35-LABEL: .visible .entry foo3( ; SM35: ld.global.nc.u16 -define void @foo3(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo3(ptr noalias readonly %from, ptr %to) { %1 = load i16, ptr %from store i16 %1, ptr %to ret void @@ -40,7 +40,7 @@ define void @foo3(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.u32 ; SM35-LABEL: .visible .entry foo4( ; SM35: ld.global.nc.u32 -define void @foo4(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo4(ptr noalias readonly %from, ptr %to) { %1 = load i32, ptr %from store i32 %1, ptr %to ret void @@ -50,7 +50,7 @@ define void @foo4(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.u64 ; SM35-LABEL: .visible .entry foo5( ; SM35: ld.global.nc.u64 -define void @foo5(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo5(ptr noalias readonly %from, ptr %to) { %1 = load i64, ptr %from store i64 %1, ptr %to ret void @@ -63,7 +63,7 @@ define void @foo5(ptr noalias readonly %from, ptr %to) { ; SM35-LABEL: .visible .entry foo6( ; SM35: ld.global.nc.u64 ; SM35: ld.global.nc.u64 -define void @foo6(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo6(ptr noalias readonly %from, ptr %to) { %1 = load i128, ptr %from store i128 %1, ptr %to ret void @@ -73,7 +73,7 @@ define void @foo6(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v2.u8 ; SM35-LABEL: .visible .entry foo7( ; SM35: ld.global.nc.v2.u8 -define void @foo7(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo7(ptr noalias readonly %from, ptr %to) { %1 = load <2 x i8>, ptr %from store <2 x i8> %1, ptr %to ret void @@ -83,7 +83,7 @@ define void @foo7(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.u32 ; SM35-LABEL: .visible .entry foo8( ; SM35: ld.global.nc.u32 -define void @foo8(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo8(ptr noalias readonly %from, ptr %to) { %1 = load <2 x i16>, ptr %from store <2 x i16> %1, ptr %to ret void @@ -93,7 +93,7 @@ define void @foo8(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v2.u32 ; SM35-LABEL: .visible .entry foo9( ; SM35: ld.global.nc.v2.u32 -define void @foo9(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo9(ptr noalias readonly %from, ptr %to) { %1 = load <2 x i32>, ptr %from store <2 x i32> %1, ptr %to ret void @@ -103,7 +103,7 @@ define void @foo9(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v2.u64 ; SM35-LABEL: .visible .entry foo10( ; SM35: ld.global.nc.v2.u64 -define void @foo10(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo10(ptr noalias readonly %from, ptr %to) { %1 = load <2 x i64>, ptr %from store <2 x i64> %1, ptr %to ret void @@ -113,7 +113,7 @@ define void @foo10(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v2.f32 ; SM35-LABEL: .visible .entry foo11( ; SM35: ld.global.nc.v2.f32 -define void @foo11(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo11(ptr noalias readonly %from, ptr %to) { %1 = load <2 x float>, ptr %from store <2 x float> %1, ptr %to ret void @@ -123,7 +123,7 @@ define void @foo11(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v2.f64 ; SM35-LABEL: .visible .entry foo12( ; SM35: ld.global.nc.v2.f64 -define void @foo12(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo12(ptr noalias readonly %from, ptr %to) { %1 = load <2 x double>, ptr %from store <2 x double> %1, ptr %to ret void @@ -133,7 +133,7 @@ define void @foo12(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.u32 ; SM35-LABEL: .visible .entry foo13( ; SM35: ld.global.nc.u32 -define void @foo13(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo13(ptr noalias readonly %from, ptr %to) { %1 = load <4 x i8>, ptr %from store <4 x i8> %1, ptr %to ret void @@ -143,7 +143,7 @@ define void @foo13(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v4.u16 ; SM35-LABEL: .visible .entry foo14( ; SM35: ld.global.nc.v4.u16 -define void @foo14(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo14(ptr noalias readonly %from, ptr %to) { %1 = load <4 x i16>, ptr %from store <4 x i16> %1, ptr %to ret void @@ -153,7 +153,7 @@ define void @foo14(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v4.u32 ; SM35-LABEL: .visible .entry foo15( ; SM35: ld.global.nc.v4.u32 -define void @foo15(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo15(ptr noalias readonly %from, ptr %to) { %1 = load <4 x i32>, ptr %from store <4 x i32> %1, ptr %to ret void @@ -163,7 +163,7 @@ define void @foo15(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.v4.f32 ; SM35-LABEL: .visible .entry foo16( ; SM35: ld.global.nc.v4.f32 -define void @foo16(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo16(ptr noalias readonly %from, ptr %to) { %1 = load <4 x float>, ptr %from store <4 x float> %1, ptr %to ret void @@ -175,7 +175,7 @@ define void @foo16(ptr noalias readonly %from, ptr %to) { ; SM35-LABEL: .visible .entry foo17( ; SM35: ld.global.nc.v2.f64 ; SM35: ld.global.nc.v2.f64 -define void @foo17(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo17(ptr noalias readonly %from, ptr %to) { %1 = load <4 x double>, ptr %from store <4 x double> %1, ptr %to ret void @@ -185,7 +185,7 @@ define void @foo17(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.u64 ; SM35-LABEL: .visible .entry foo18( ; SM35: ld.global.nc.u64 -define void @foo18(ptr noalias readonly %from, ptr %to) { +define ptx_kernel void @foo18(ptr noalias readonly %from, ptr %to) { %1 = load ptr, ptr %from store ptr %1, ptr %to ret void @@ -196,7 +196,7 @@ define void @foo18(ptr noalias readonly %from, ptr %to) { ; SM20: ld.global.f32 ; SM35-LABEL: .visible .entry foo19( ; SM35: ld.global.nc.f32 -define void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) { +define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) { entry: br label %loop @@ -243,24 +243,3 @@ define void @notkernel2(ptr addrspace(1) noalias readonly %from, ptr %to) { store float %1, ptr %to ret void } - -!nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19} -!1 = !{ptr @foo1, !"kernel", i32 1} -!2 = !{ptr @foo2, !"kernel", i32 1} -!3 = !{ptr @foo3, !"kernel", i32 1} -!4 = !{ptr @foo4, !"kernel", i32 1} -!5 = !{ptr @foo5, !"kernel", i32 1} -!6 = !{ptr @foo6, !"kernel", i32 1} -!7 = !{ptr @foo7, !"kernel", i32 1} -!8 = !{ptr @foo8, !"kernel", i32 1} -!9 = !{ptr @foo9, !"kernel", i32 1} -!10 = !{ptr @foo10, !"kernel", i32 1} -!11 = !{ptr @foo11, !"kernel", i32 1} -!12 = !{ptr @foo12, !"kernel", i32 1} -!13 = !{ptr @foo13, !"kernel", i32 1} -!14 = !{ptr @foo14, !"kernel", i32 1} -!15 = !{ptr @foo15, !"kernel", i32 1} -!16 = !{ptr @foo16, !"kernel", i32 1} -!17 = !{ptr @foo17, !"kernel", i32 1} -!18 = !{ptr @foo18, !"kernel", i32 1} -!19 = !{ptr @foo19, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index e42f2303cdf7c..f21ff974a2c6b 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -29,7 +29,7 @@ define void @foo(i32 %a) { ; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo2_param_0]; ; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0; ; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}}; -define void @foo2(i32 %a) { +define ptx_kernel void @foo2(i32 %a) { %local = alloca i32, align 4 store i32 %a, ptr %local call void @bar(ptr %local) @@ -38,8 +38,6 @@ define void @foo2(i32 %a) { declare void @bar(ptr %a) -!nvvm.annotations = !{!0} -!0 = !{ptr @foo2, !"kernel", i32 1} ; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}}; ; PTX32-NOT: cvta.local.u32 %SP, %SPL; diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll index 8f2d55151b311..530b48b3d3e37 100644 --- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll +++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll @@ -6,7 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" -define void @kernel() { +define ptx_kernel void @kernel() { ; LABEL: @lower_alloca ; PTX-LABEL: .visible .entry kernel( %A = alloca i32 @@ -37,7 +37,5 @@ define void @alloca_in_explicit_local_as() { declare void @callee(ptr) declare void @callee_addrspace5(ptr addrspace(5)) -!nvvm.annotations = !{!0} !nvvm.annotations = !{!1} -!0 = !{ptr @kernel, !"kernel", i32 1} !1 = !{ptr @alloca_in_explicit_local_as, !"alloca_in_explicit_local_as", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 9cfe9192772b8..27cf8ca5b61d6 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -68,7 +68,7 @@ entry: ret i32 %0, !dbg !23 } -define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) { +define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %out, i32 %n) { ; PTX-LABEL: grid_const_int( ; PTX: { ; PTX-NEXT: .reg .b32 %r<4>; @@ -82,7 +82,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou ; PTX-NEXT: add.s32 %r3, %r2, %r1; ; PTX-NEXT: st.global.u32 [%rd2], %r3; ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_int( +; OPT-LABEL: define ptx_kernel void @grid_const_int( ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) ; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr @@ -91,6 +91,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4 ; OPT-NEXT: ret void +; %tmp = load i32, ptr %input1, align 4 %add = add i32 %tmp, %input2 store i32 %add, ptr %out @@ -99,7 +100,7 @@ define void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %input2, ptr %ou %struct.s = type { i32, i32 } -define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ +define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ ; PTX-LABEL: grid_const_struct( ; PTX: { ; PTX-NEXT: .reg .b32 %r<4>; @@ -113,7 +114,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ ; PTX-NEXT: add.s32 %r3, %r1, %r2; ; PTX-NEXT: st.global.u32 [%rd2], %r3; ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_struct( +; OPT-LABEL: define ptx_kernel void @grid_const_struct( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) ; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr @@ -125,6 +126,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4 ; OPT-NEXT: ret void +; %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %int1 = load i32, ptr %gep1 @@ -134,7 +136,7 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){ ret void } -define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { +define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-LABEL: grid_const_escape( ; PTX: { ; PTX-NEXT: .reg .b32 %r<3>; @@ -159,17 +161,18 @@ define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-NEXT: ld.param.b32 %r1, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_escape( +; OPT-LABEL: define ptx_kernel void @grid_const_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) ; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]]) ; OPT-NEXT: ret void +; %call = call i32 @escape(ptr %input) ret void } -define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) { +define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) { ; PTX-LABEL: multiple_grid_const_escape( ; PTX: { ; PTX-NEXT: .local .align 4 .b8 __local_depot4[4]; @@ -212,7 +215,7 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 ; PTX-NEXT: ld.param.b32 %r2, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; -; OPT-LABEL: define void @multiple_grid_const_escape( +; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101) ; OPT-NEXT: [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]]) @@ -222,13 +225,14 @@ define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 ; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]]) ; OPT-NEXT: ret void +; %a.addr = alloca i32, align 4 store i32 %a, ptr %a.addr, align 4 %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b) ret void } -define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) { +define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr %addr) { ; PTX-LABEL: grid_const_memory_escape( ; PTX: { ; PTX-NEXT: .reg .b64 %rd<6>; @@ -241,7 +245,7 @@ define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr % ; PTX-NEXT: cvta.param.u64 %rd5, %rd4; ; PTX-NEXT: st.global.u64 [%rd3], %rd5; ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_memory_escape( +; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1) ; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr @@ -249,11 +253,12 @@ define void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %input, ptr % ; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) ; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8 ; OPT-NEXT: ret void +; store ptr %input, ptr %addr, align 8 ret void } -define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) { +define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, ptr %result) { ; PTX-LABEL: grid_const_inlineasm_escape( ; PTX: { ; PTX-NEXT: .reg .b64 %rd<8>; @@ -271,7 +276,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt ; PTX-NEXT: st.global.u64 [%rd6], %rd1; ; PTX-NEXT: ret; ; PTX-NOT .local -; OPT-LABEL: define void @grid_const_inlineasm_escape( +; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1) ; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr @@ -282,6 +287,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt ; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2 ; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8 ; OPT-NEXT: ret void +; %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1 @@ -289,7 +295,7 @@ define void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 %input, pt ret void } -define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { +define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escape( ; PTX: { ; PTX-NEXT: .reg .b32 %r<5>; @@ -319,7 +325,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-NEXT: ld.param.b32 %r3, [retval0]; ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_partial_escape( +; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape( ; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1) ; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr @@ -330,6 +336,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]]) ; OPT-NEXT: ret void +; %val = load i32, ptr %input %twice = add i32 %val, %val store i32 %twice, ptr %output @@ -337,7 +344,7 @@ define void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ret void } -define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) { +define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escapemem( ; PTX: { ; PTX-NEXT: .reg .b32 %r<6>; @@ -369,7 +376,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu ; PTX-NEXT: } // callseq 3 ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; -; OPT-LABEL: define i32 @grid_const_partial_escapemem( +; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1) ; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr @@ -383,6 +390,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu ; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]] ; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]]) ; OPT-NEXT: ret i32 [[ADD]] +; %ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %val1 = load i32, ptr %ptr1 %ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 @@ -393,7 +401,7 @@ define i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %outpu ret i32 %add } -define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) { +define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) { ; PTX-LABEL: grid_const_phi( ; PTX: { ; PTX-NEXT: .reg .pred %p<2>; @@ -415,7 +423,7 @@ define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) { ; PTX-NEXT: ld.u32 %r2, [%rd8]; ; PTX-NEXT: st.global.u32 [%rd1], %r2; ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_phi( +; OPT-LABEL: define ptx_kernel void @grid_const_phi( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) ; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr @@ -435,6 +443,7 @@ define void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr %inout) { ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; OPT-NEXT: ret void +; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 @@ -453,7 +462,7 @@ merge: } ; NOTE: %input2 is *not* grid_constant -define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) { +define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval(%struct.s) %input2, ptr %inout) { ; PTX-LABEL: grid_const_phi_ngc( ; PTX: { ; PTX-NEXT: .reg .pred %p<2>; @@ -478,7 +487,7 @@ define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval( ; PTX-NEXT: ld.u32 %r2, [%rd11]; ; PTX-NEXT: st.global.u32 [%rd1], %r2; ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_phi_ngc( +; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc( ; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) ; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr @@ -500,6 +509,7 @@ define void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ptr byval( ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; OPT-NEXT: ret void +; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 br i1 %less, label %first, label %second @@ -517,7 +527,7 @@ merge: } ; NOTE: %input2 is *not* grid_constant -define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) { +define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %inout) { ; PTX-LABEL: grid_const_select( ; PTX: { ; PTX-NEXT: .reg .pred %p<2>; @@ -539,7 +549,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i ; PTX-NEXT: ld.u32 %r2, [%rd9]; ; PTX-NEXT: st.global.u32 [%rd3], %r2; ; PTX-NEXT: ret; -; OPT-LABEL: define void @grid_const_select( +; OPT-LABEL: define ptx_kernel void @grid_const_select( ; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) ; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr @@ -553,6 +563,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; OPT-NEXT: ret void +; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 %ptrnew = select i1 %less, ptr %input1, ptr %input2 @@ -561,7 +572,7 @@ define void @grid_const_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %i ret void } -define i32 @grid_const_ptrtoint(ptr byval(i32) %input) { +define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; PTX-LABEL: grid_const_ptrtoint( ; PTX: { ; PTX-NEXT: .reg .b32 %r<4>; @@ -576,7 +587,7 @@ define i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; PTX-NEXT: add.s32 %r3, %r1, %r2; ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; -; OPT-LABEL: define i32 @grid_const_ptrtoint( +; OPT-LABEL: define ptx_kernel i32 @grid_const_ptrtoint( ; OPT-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR0]] { ; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101) ; OPT-NEXT: [[INPUT3:%.*]] = load i32, ptr addrspace(101) [[INPUT2]], align 4 @@ -584,6 +595,7 @@ define i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32 ; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]] ; OPT-NEXT: ret i32 [[KEEPALIVE]] +; %val = load i32, ptr %input %ptrval = ptrtoint ptr %input to i32 %keepalive = add i32 %val, %ptrval @@ -598,40 +610,40 @@ declare dso_local ptr @escape3(ptr, ptr, ptr) local_unnamed_addr !nvvm.annotations = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23} -!0 = !{ptr @grid_const_int, !"kernel", i32 1, !"grid_constant", !1} +!0 = !{ptr @grid_const_int, !"grid_constant", !1} !1 = !{i32 1} -!2 = !{ptr @grid_const_struct, !"kernel", i32 1, !"grid_constant", !3} +!2 = !{ptr @grid_const_struct, !"grid_constant", !3} !3 = !{i32 1} -!4 = !{ptr @grid_const_escape, !"kernel", i32 1, !"grid_constant", !5} +!4 = !{ptr @grid_const_escape, !"grid_constant", !5} !5 = !{i32 1} -!6 = !{ptr @multiple_grid_const_escape, !"kernel", i32 1, !"grid_constant", !7} +!6 = !{ptr @multiple_grid_const_escape, !"grid_constant", !7} !7 = !{i32 1, i32 3} -!8 = !{ptr @grid_const_memory_escape, !"kernel", i32 1, !"grid_constant", !9} +!8 = !{ptr @grid_const_memory_escape, !"grid_constant", !9} !9 = !{i32 1} -!10 = !{ptr @grid_const_inlineasm_escape, !"kernel", i32 1, !"grid_constant", !11} +!10 = !{ptr @grid_const_inlineasm_escape, !"grid_constant", !11} !11 = !{i32 1} -!12 = !{ptr @grid_const_partial_escape, !"kernel", i32 1, !"grid_constant", !13} +!12 = !{ptr @grid_const_partial_escape, !"grid_constant", !13} !13 = !{i32 1} -!14 = !{ptr @grid_const_partial_escapemem, !"kernel", i32 1, !"grid_constant", !15} +!14 = !{ptr @grid_const_partial_escapemem, !"grid_constant", !15} !15 = !{i32 1} -!16 = !{ptr @grid_const_phi, !"kernel", i32 1, !"grid_constant", !17} +!16 = !{ptr @grid_const_phi, !"grid_constant", !17} !17 = !{i32 1} -!18 = !{ptr @grid_const_phi_ngc, !"kernel", i32 1, !"grid_constant", !19} +!18 = !{ptr @grid_const_phi_ngc, !"grid_constant", !19} !19 = !{i32 1} -!20 = !{ptr @grid_const_select, !"kernel", i32 1, !"grid_constant", !21} +!20 = !{ptr @grid_const_select, !"grid_constant", !21} !21 = !{i32 1} -!22 = !{ptr @grid_const_ptrtoint, !"kernel", i32 1, !"grid_constant", !23} +!22 = !{ptr @grid_const_ptrtoint, !"grid_constant", !23} !23 = !{i32 1} diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index eba4f273fa709..269bba75dc5fb 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -65,7 +65,7 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) { } ; COMMON-LABEL: ptr_generic -define void @ptr_generic(ptr %out, ptr %in) { +define ptx_kernel void @ptr_generic(ptr %out, ptr %in) { ; IRC: %in3 = addrspacecast ptr %in to ptr addrspace(1) ; IRC: %in4 = addrspacecast ptr addrspace(1) %in3 to ptr ; IRC: %out1 = addrspacecast ptr %out to ptr addrspace(1) @@ -87,7 +87,7 @@ define void @ptr_generic(ptr %out, ptr %in) { } ; COMMON-LABEL: ptr_nongeneric -define void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) { +define ptx_kernel void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) { ; IR-NOT: addrspacecast ; PTX-NOT: cvta.to.global ; PTX: ld.const.u32 @@ -98,7 +98,7 @@ define void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) { } ; COMMON-LABEL: ptr_as_int - define void @ptr_as_int(i64 noundef %i, i32 noundef %v) { + define ptx_kernel void @ptr_as_int(i64 noundef %i, i32 noundef %v) { ; IR: [[P:%.*]] = inttoptr i64 %i to ptr ; IRC: [[P1:%.*]] = addrspacecast ptr [[P]] to ptr addrspace(1) ; IRC: addrspacecast ptr addrspace(1) [[P1]] to ptr @@ -121,7 +121,7 @@ define void @ptr_nongeneric(ptr addrspace(1) %out, ptr addrspace(4) %in) { %struct.S = type { i64 } ; COMMON-LABEL: ptr_as_int_aggr -define void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) { +define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) align 8 %s, i32 noundef %v) { ; IR: [[S:%.*]] = addrspacecast ptr %s to ptr addrspace(101) ; IR: [[I:%.*]] = load i64, ptr addrspace(101) [[S]], align 8 ; IR: [[P0:%.*]] = inttoptr i64 [[I]] to ptr @@ -146,8 +146,3 @@ define void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%struct.S) ali ; Function Attrs: convergent nounwind declare dso_local ptr @escape(ptr) local_unnamed_addr -!nvvm.annotations = !{!0, !1, !2, !3} -!0 = !{ptr @ptr_generic, !"kernel", i32 1} -!1 = !{ptr @ptr_nongeneric, !"kernel", i32 1} -!2 = !{ptr @ptr_as_int, !"kernel", i32 1} -!3 = !{ptr @ptr_as_int_aggr, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 5c52626a711fe..26102722a483b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -24,8 +24,8 @@ declare void @llvm.memmove.p0.p0.i64(ptr nocapture writeonly, ptr nocapture read declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @read_only( +define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @read_only( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -35,7 +35,7 @@ define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocap ; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @read_only( +; SM_70-LABEL: define dso_local ptx_kernel void @read_only( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -45,7 +45,7 @@ define dso_local void @read_only(ptr nocapture noundef writeonly %out, ptr nocap ; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @read_only( +; COPY-LABEL: define dso_local ptx_kernel void @read_only( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -62,8 +62,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @read_only_gep( +define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -74,7 +74,7 @@ define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr n ; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @read_only_gep( +; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -85,7 +85,7 @@ define dso_local void @read_only_gep(ptr nocapture noundef writeonly %out, ptr n ; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @read_only_gep( +; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -104,8 +104,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @read_only_gep_asc( +define dso_local ptx_kernel void @read_only_gep_asc(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -116,7 +116,7 @@ define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, p ; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @read_only_gep_asc( +; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -127,7 +127,7 @@ define dso_local void @read_only_gep_asc(ptr nocapture noundef writeonly %out, p ; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @read_only_gep_asc( +; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -148,8 +148,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @read_only_gep_asc0( +define dso_local ptx_kernel void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -164,7 +164,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ; SM_60-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @read_only_gep_asc0( +; SM_70-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -179,7 +179,7 @@ define dso_local void @read_only_gep_asc0(ptr nocapture noundef writeonly %out, ; SM_70-NEXT: store i32 [[I]], ptr [[OUT2]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @read_only_gep_asc0( +; COPY-LABEL: define dso_local ptx_kernel void @read_only_gep_asc0( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -202,8 +202,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @escape_ptr( +define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr( ; SM_60-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -214,7 +214,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound ; SM_60-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]]) ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @escape_ptr( +; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr( ; SM_70-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -225,7 +225,7 @@ define dso_local void @escape_ptr(ptr nocapture noundef readnone %out, ptr nound ; SM_70-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S3]]) ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @escape_ptr( +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr( ; COPY-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -240,8 +240,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @escape_ptr_gep( +define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_gep( ; SM_60-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -253,7 +253,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n ; SM_60-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @escape_ptr_gep( +; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_gep( ; SM_70-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -265,7 +265,7 @@ define dso_local void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr n ; SM_70-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @escape_ptr_gep( +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep( ; COPY-SAME: ptr nocapture noundef readnone [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -282,8 +282,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @escape_ptr_store( +define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_store( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -294,7 +294,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt ; SM_60-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @escape_ptr_store( +; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_store( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -305,7 +305,7 @@ define dso_local void @escape_ptr_store(ptr nocapture noundef writeonly %out, pt ; SM_70-NEXT: store ptr [[S3]], ptr [[OUT2]], align 8 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @escape_ptr_store( +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_store( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -320,8 +320,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @escape_ptr_gep_store( +define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -333,7 +333,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out ; SM_60-NEXT: store ptr [[B]], ptr [[OUT2]], align 8 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @escape_ptr_gep_store( +; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -345,7 +345,7 @@ define dso_local void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out ; SM_70-NEXT: store ptr [[B]], ptr [[OUT2]], align 8 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @escape_ptr_gep_store( +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -362,8 +362,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @escape_ptrtoint( +define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @escape_ptrtoint( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -375,7 +375,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr ; SM_60-NEXT: store i64 [[I]], ptr [[OUT2]], align 8 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @escape_ptrtoint( +; SM_70-LABEL: define dso_local ptx_kernel void @escape_ptrtoint( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -387,7 +387,7 @@ define dso_local void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr ; SM_70-NEXT: store i64 [[I]], ptr [[OUT2]], align 8 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @escape_ptrtoint( +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptrtoint( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -404,8 +404,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @memcpy_from_param( +define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_from_param( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -414,7 +414,7 @@ define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, p ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true) ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @memcpy_from_param( +; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_from_param( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -423,7 +423,7 @@ define dso_local void @memcpy_from_param(ptr nocapture noundef writeonly %out, p ; SM_70-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true) ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @memcpy_from_param( +; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -438,8 +438,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @memcpy_from_param_noalign( +define dso_local ptx_kernel void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign( ; SM_60-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -448,7 +448,7 @@ define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonl ; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true) ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @memcpy_from_param_noalign( +; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign( ; SM_70-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(101) @@ -457,7 +457,7 @@ define dso_local void @memcpy_from_param_noalign (ptr nocapture noundef writeonl ; SM_70-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT2]], ptr addrspace(101) [[S3]], i64 16, i1 true) ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @memcpy_from_param_noalign( +; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign( ; COPY-SAME: ptr nocapture noundef writeonly [[OUT:%.*]], ptr nocapture noundef readonly byval([[STRUCT_S:%.*]]) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 8 @@ -472,8 +472,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @memcpy_to_param( +define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef readnone byval(%struct.S) align 4 %s) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @memcpy_to_param( ; SM_60-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[ENTRY:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -484,7 +484,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n ; SM_60-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true) ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @memcpy_to_param( +; SM_70-LABEL: define dso_local ptx_kernel void @memcpy_to_param( ; SM_70-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[ENTRY:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -495,7 +495,7 @@ define dso_local void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr n ; SM_70-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S3]], ptr [[IN2]], i64 16, i1 true) ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @memcpy_to_param( +; COPY-LABEL: define dso_local ptx_kernel void @memcpy_to_param( ; COPY-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef readnone byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[ENTRY:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -510,8 +510,8 @@ entry: } ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) -define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 { -; SM_60-LABEL: define dso_local void @copy_on_store( +define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 { +; SM_60-LABEL: define dso_local ptx_kernel void @copy_on_store( ; SM_60-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_60-NEXT: [[BB:.*:]] ; SM_60-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -523,7 +523,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc ; SM_60-NEXT: store i32 [[I]], ptr [[S3]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define dso_local void @copy_on_store( +; SM_70-LABEL: define dso_local ptx_kernel void @copy_on_store( ; SM_70-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { ; SM_70-NEXT: [[BB:.*:]] ; SM_70-NEXT: [[S3:%.*]] = alloca [[STRUCT_S]], align 4 @@ -535,7 +535,7 @@ define dso_local void @copy_on_store(ptr nocapture noundef readonly %in, ptr noc ; SM_70-NEXT: store i32 [[I]], ptr [[S3]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define dso_local void @copy_on_store( +; COPY-LABEL: define dso_local ptx_kernel void @copy_on_store( ; COPY-SAME: ptr nocapture noundef readonly [[IN:%.*]], ptr nocapture noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 @@ -551,8 +551,8 @@ bb: ret void } -define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) { -; SM_60-LABEL: define void @test_select( +define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) { +; SM_60-LABEL: define ptx_kernel void @test_select( ; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; SM_60-NEXT: [[BB:.*:]] ; SM_60-NEXT: [[OUT7:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) @@ -568,7 +568,7 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT8]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define void @test_select( +; SM_70-LABEL: define ptx_kernel void @test_select( ; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; SM_70-NEXT: [[BB:.*:]] ; SM_70-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) @@ -582,7 +582,7 @@ define void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[OUT2]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define void @test_select( +; COPY-LABEL: define ptx_kernel void @test_select( ; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT23:%.*]] = alloca i32, align 4 @@ -603,8 +603,8 @@ bb: ret void } -define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) { -; SM_60-LABEL: define void @test_select_write( +define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %input2, ptr %out, i1 %cond) { +; SM_60-LABEL: define ptx_kernel void @test_select_write( ; SM_60-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; SM_60-NEXT: [[BB:.*:]] ; SM_60-NEXT: [[OUT5:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) @@ -619,7 +619,7 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i ; SM_60-NEXT: store i32 1, ptr [[PTRNEW]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define void @test_select_write( +; SM_70-LABEL: define ptx_kernel void @test_select_write( ; SM_70-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; SM_70-NEXT: [[BB:.*:]] ; SM_70-NEXT: [[OUT5:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) @@ -634,7 +634,7 @@ define void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) %i ; SM_70-NEXT: store i32 1, ptr [[PTRNEW]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define void @test_select_write( +; COPY-LABEL: define ptx_kernel void @test_select_write( ; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT23:%.*]] = alloca i32, align 4 @@ -653,8 +653,8 @@ bb: ret void } -define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, ptr %inout, i1 %cond) { -; SM_60-LABEL: define void @test_phi( +define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, ptr %inout, i1 %cond) { +; SM_60-LABEL: define ptx_kernel void @test_phi( ; SM_60-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; SM_60-NEXT: [[BB:.*:]] ; SM_60-NEXT: [[INOUT7:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) @@ -678,7 +678,7 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) ; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[INOUT8]], align 4 ; SM_60-NEXT: ret void ; -; SM_70-LABEL: define void @test_phi( +; SM_70-LABEL: define ptx_kernel void @test_phi( ; SM_70-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; SM_70-NEXT: [[BB:.*:]] ; SM_70-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1) @@ -700,7 +700,7 @@ define void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) ; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; SM_70-NEXT: ret void ; -; COPY-LABEL: define void @test_phi( +; COPY-LABEL: define ptx_kernel void @test_phi( ; COPY-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT23:%.*]] = alloca [[STRUCT_S]], align 8 @@ -740,8 +740,8 @@ merge: ; preds = %second, %first ret void } -define void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, i1 %cond) { -; COMMON-LABEL: define void @test_phi_write( +define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) %input2, i1 %cond) { +; COMMON-LABEL: define ptx_kernel void @test_phi_write( ; COMMON-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; COMMON-NEXT: [[BB:.*:]] ; COMMON-NEXT: [[INPUT24:%.*]] = alloca [[STRUCT_S]], align 8 @@ -784,29 +784,11 @@ attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: readwrite attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } !llvm.module.flags = !{!0, !1, !2, !3} -!nvvm.annotations = !{!4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !23} !llvm.ident = !{!20, !21} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 8]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} !3 = !{i32 7, !"frame-pointer", i32 2} -!4 = !{ptr @read_only, !"kernel", i32 1} -!5 = !{ptr @escape_ptr, !"kernel", i32 1} -!6 = !{ptr @escape_ptr_gep, !"kernel", i32 1} -!7 = !{ptr @escape_ptr_store, !"kernel", i32 1} -!8 = !{ptr @escape_ptr_gep_store, !"kernel", i32 1} -!9 = !{ptr @escape_ptrtoint, !"kernel", i32 1} -!10 = !{ptr @memcpy_from_param, !"kernel", i32 1} -!11 = !{ptr @memcpy_to_param, !"kernel", i32 1} -!12 = !{ptr @copy_on_store, !"kernel", i32 1} -!13 = !{ptr @read_only_gep, !"kernel", i32 1} -!14 = !{ptr @read_only_gep_asc, !"kernel", i32 1} -!15 = !{ptr @read_only_gep_asc0, !"kernel", i32 1} -!16 = !{ptr @test_select, !"kernel", i32 1} -!17 = !{ptr @test_phi, !"kernel", i32 1} -!18 = !{ptr @test_phi_write, !"kernel", i32 1} -!19 = !{ptr @test_select_write, !"kernel", i32 1} !20 = !{!"clang version 20.0.0git"} !21 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} -!23 = !{ptr @memcpy_from_param_noalign, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll index f8b3b4b9b8c44..4ee1ca3ad4b1f 100644 --- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll @@ -43,7 +43,7 @@ define internal void @bar() { ret void } -; CHECK-LABEL: define weak_odr void @"nvptx$device$init"() { +; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$init"() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_start, align 8 ; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__init_array_end, align 8 @@ -60,7 +60,7 @@ define internal void @bar() { ; CHECK-NEXT: ret void ; ; -; CHECK-LABEL: define weak_odr void @"nvptx$device$fini"() { +; CHECK-LABEL: define weak_odr ptx_kernel void @"nvptx$device$fini"() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[BEGIN:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_start, align 8 ; CHECK-NEXT: [[STOP:%.*]] = load ptr addrspace(1), ptr addrspace(1) @__fini_array_end, align 8 @@ -82,12 +82,10 @@ define internal void @bar() { ; CHECK: while.end: ; CHECK-NEXT: ret void -; CHECK: [[META0:![0-9]+]] = !{ptr @"nvptx$device$init", !"kernel", i32 1} ; CHECK: [[META1:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidx", i32 1} ; CHECK: [[META2:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidy", i32 1} ; CHECK: [[META3:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxntidz", i32 1} ; CHECK: [[META4:![0-9]+]] = !{ptr @"nvptx$device$init", !"maxclusterrank", i32 1} -; CHECK: [[META5:![0-9]+]] = !{ptr @"nvptx$device$fini", !"kernel", i32 1} ; CHECK: [[META6:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidx", i32 1} ; CHECK: [[META7:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidy", i32 1} ; CHECK: [[META8:![0-9]+]] = !{ptr @"nvptx$device$fini", !"maxntidz", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll index 9ec690a68e7ea..2e64c25594811 100644 --- a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll +++ b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll @@ -6,7 +6,7 @@ target triple = "nvptx64-nvidia-cuda" ; Verify that both %input and %output are converted to global pointers and then ; addrspacecast'ed back to the original type. -define void @kernel(ptr %input, ptr %output) { +define ptx_kernel void @kernel(ptr %input, ptr %output) { ; CHECK-LABEL: .visible .entry kernel( ; CHECK: cvta.to.global.u64 ; CHECK: cvta.to.global.u64 @@ -17,7 +17,7 @@ define void @kernel(ptr %input, ptr %output) { ret void } -define void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) { +define ptx_kernel void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) { ; CHECK-LABEL: .visible .entry kernel2( ; CHECK-NOT: cvta.to.global.u64 %1 = load float, ptr addrspace(1) %input, align 4 @@ -29,7 +29,7 @@ define void @kernel2(ptr addrspace(1) %input, ptr addrspace(1) %output) { %struct.S = type { ptr, ptr } -define void @ptr_in_byval_kernel(ptr byval(%struct.S) %input, ptr %output) { +define ptx_kernel void @ptr_in_byval_kernel(ptr byval(%struct.S) %input, ptr %output) { ; CHECK-LABEL: .visible .entry ptr_in_byval_kernel( ; CHECK: ld.param.u64 %[[optr:rd.*]], [ptr_in_byval_kernel_param_1] ; CHECK: cvta.to.global.u64 %[[optr_g:.*]], %[[optr]]; @@ -60,7 +60,3 @@ define void @ptr_in_byval_func(ptr byval(%struct.S) %input, ptr %output) { ret void } -!nvvm.annotations = !{!0, !1, !2} -!0 = !{ptr @kernel, !"kernel", i32 1} -!1 = !{ptr @kernel2, !"kernel", i32 1} -!2 = !{ptr @ptr_in_byval_kernel, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll index 3389e090aac57..c445c34c1842a 100644 --- a/llvm/test/CodeGen/NVPTX/maxclusterrank.ll +++ b/llvm/test/CodeGen/NVPTX/maxclusterrank.ll @@ -11,16 +11,15 @@ target triple = "nvptx64-unknown-unknown" ; Make sure that for SM version prior to 90 `.maxclusterrank` directive is ; sielently ignored. -define dso_local void @_Z18TestMaxClusterRankv() { +define dso_local ptx_kernel void @_Z18TestMaxClusterRankv() { entry: %a = alloca i32, align 4 store volatile i32 1, ptr %a, align 4 ret void } -!nvvm.annotations = !{!0, !1, !2, !3} +!nvvm.annotations = !{!1, !2, !3} -!0 = !{ptr @_Z18TestMaxClusterRankv, !"kernel", i32 1} !1 = !{ptr @_Z18TestMaxClusterRankv, !"maxntidx", i32 128} !2 = !{ptr @_Z18TestMaxClusterRankv, !"minctasm", i32 2} !3 = !{ptr @_Z18TestMaxClusterRankv, !"maxclusterrank", i32 8} diff --git a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll index 2bc6d4cfa7f6d..2a0c5ab7299ba 100644 --- a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll +++ b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll @@ -66,7 +66,4 @@ if.end17: ; preds = %if.else13, %if.then } ; Function Attrs: noduplicate nounwind -declare void @llvm.nvvm.barrier0() #2 - -!0 = !{ptr @foo, !"kernel", i32 1} -!1 = !{null, !"align", i32 8} +declare void @llvm.nvvm.barrier0() #2 \ No newline at end of file diff --git a/llvm/test/CodeGen/NVPTX/noreturn.ll b/llvm/test/CodeGen/NVPTX/noreturn.ll index 2161d70a88529..6c11d0a9376a3 100644 --- a/llvm/test/CodeGen/NVPTX/noreturn.ll +++ b/llvm/test/CodeGen/NVPTX/noreturn.ll @@ -27,7 +27,7 @@ define void @true_noreturn0() #0 { ; CHECK: .entry ignore_kernel_noreturn() ; CHECK-NOT: .noreturn -define void @ignore_kernel_noreturn() #0 { +define ptx_kernel void @ignore_kernel_noreturn() #0 { unreachable } @@ -35,7 +35,7 @@ define void @ignore_kernel_noreturn() #0 { ; CHECK: prototype_{{[0-9]+}} : .callprototype ()_ (.param .b32 _) .noreturn; ; CHECK: prototype_{{[0-9]+}} : .callprototype (.param .b32 _) _ (.param .b32 _); -define void @callprototype_noreturn(i32) { +define ptx_kernel void @callprototype_noreturn(i32) { %fn = load ptr, ptr addrspace(1) @function_pointer call void %fn(i32 %0) #0 %non_void = bitcast ptr %fn to ptr @@ -44,8 +44,3 @@ define void @callprototype_noreturn(i32) { } attributes #0 = { noreturn } - -!nvvm.annotations = !{!0, !1} - -!0 = !{ptr @ignore_kernel_noreturn, !"kernel", i32 1} -!1 = !{ptr @callprototype_noreturn, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll index 48162eaba257d..9a78d31302e15 100644 --- a/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll +++ b/llvm/test/CodeGen/NVPTX/nvcl-param-align.ll @@ -3,7 +3,7 @@ target triple = "nvptx-unknown-nvcl" -define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) { +define ptx_kernel void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) { ; The parameter alignment is determined by the align attribute (default 1). ; CHECK-LABEL: .entry foo( ; CHECK: .param .u64 .ptr .align 32 foo_param_2 @@ -11,7 +11,6 @@ define void @foo(i64 %img, i64 %sampler, ptr align 32 %v1, ptr %v2) { ret void } -!nvvm.annotations = !{!1, !2, !3} -!1 = !{ptr @foo, !"kernel", i32 1} +!nvvm.annotations = !{!2, !3} !2 = !{ptr @foo, !"rdoimage", i32 0} !3 = !{ptr @foo, !"sampler", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/refl1.ll b/llvm/test/CodeGen/NVPTX/refl1.ll index 34db3bb1a1a9a..99b83f49ff9b1 100644 --- a/llvm/test/CodeGen/NVPTX/refl1.ll +++ b/llvm/test/CodeGen/NVPTX/refl1.ll @@ -5,7 +5,7 @@ target triple = "nvptx-nvidia-cuda" ; Function Attrs: nounwind ; CHECK: .entry foo -define void @foo(ptr nocapture %a) #0 { +define ptx_kernel void @foo(ptr nocapture %a) #0 { %val = load float, ptr %a %tan = tail call fastcc float @__nv_fast_tanf(float %val) store float %tan, ptr %a @@ -34,7 +34,3 @@ entry: attributes #0 = { nounwind } attributes #1 = { nounwind readnone } attributes #2 = { alwaysinline inlinehint nounwind readnone } - -!nvvm.annotations = !{!0} - -!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/reg-copy.ll b/llvm/test/CodeGen/NVPTX/reg-copy.ll index f66ef195c625b..20396c4cc69fe 100644 --- a/llvm/test/CodeGen/NVPTX/reg-copy.ll +++ b/llvm/test/CodeGen/NVPTX/reg-copy.ll @@ -4,7 +4,7 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-unknown-unknown" -define void @PR24303(ptr %f) { +define ptx_kernel void @PR24303(ptr %f) { ; CHECK-LABEL: .visible .entry PR24303( ; Do not use mov.f or mov.u to convert between float and int. ; CHECK-NOT: mov.{{f|u}}{{32|64}} %f{{[0-9]+}}, %r{{[0-9]+}} @@ -217,7 +217,3 @@ _ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit: ; preds = %if.then.93.i, % } declare float @llvm.nvvm.fabs.f(float) - -!nvvm.annotations = !{!0} - -!0 = !{ptr @PR24303, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll index 3580604d429de..991ae04b91b67 100644 --- a/llvm/test/CodeGen/NVPTX/simple-call.ll +++ b/llvm/test/CodeGen/NVPTX/simple-call.ll @@ -10,7 +10,7 @@ define float @device_func(float %a) noinline { } ; CHECK: .entry kernel_func -define void @kernel_func(ptr %a) { +define ptx_kernel void @kernel_func(ptr %a) { %val = load float, ptr %a ; CHECK: call.uni (retval0), ; CHECK: device_func, @@ -18,9 +18,3 @@ define void @kernel_func(ptr %a) { store float %mul, ptr %a ret void } - - - -!nvvm.annotations = !{!1} - -!1 = !{ptr @kernel_func, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll index 504dcdeb3370c..7a7904a2f0425 100644 --- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -10,7 +10,7 @@ declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -define void @foo(i64 %img, ptr %red, i32 %idx) { +define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) { ; CHECK-LABEL: foo( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -34,7 +34,7 @@ define void @foo(i64 %img, ptr %red, i32 %idx) { @surf0 = internal addrspace(1) global i64 0, align 8 -define void @bar(ptr %red, i32 %idx) { +define ptx_kernel void @bar(ptr %red, i32 %idx) { ; CHECK-LABEL: bar( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -56,11 +56,5 @@ define void @bar(ptr %red, i32 %idx) { ret void } - - - -!nvvm.annotations = !{!1, !2, !3} -!1 = !{ptr @foo, !"kernel", i32 1} -!2 = !{ptr @bar, !"kernel", i32 1} -!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1} - +!nvvm.annotations = !{!1} +!1 = !{ptr addrspace(1) @surf0, !"surface", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/surf-read.ll b/llvm/test/CodeGen/NVPTX/surf-read.ll index e0cebd60d7dd0..cd11b5617076b 100644 --- a/llvm/test/CodeGen/NVPTX/surf-read.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read.ll @@ -6,7 +6,7 @@ target triple = "nvptx64-unknown-nvcl" declare i32 @llvm.nvvm.suld.1d.i32.trap(i64, i32) ; CHECK: .entry foo -define void @foo(i64 %img, ptr %red, i32 %idx) { +define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) { ; CHECK: suld.b.1d.b32.trap {%r[[RED:[0-9]+]]}, [foo_param_0, {%r{{[0-9]+}}}] %val = tail call i32 @llvm.nvvm.suld.1d.i32.trap(i64 %img, i32 %idx) ; CHECK: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] @@ -16,6 +16,5 @@ define void @foo(i64 %img, ptr %red, i32 %idx) { ret void } -!nvvm.annotations = !{!1, !2} -!1 = !{ptr @foo, !"kernel", i32 1} -!2 = !{ptr @foo, !"rdwrimage", i32 0} +!nvvm.annotations = !{!1} +!1 = !{ptr @foo, !"rdwrimage", i32 0} diff --git a/llvm/test/CodeGen/NVPTX/surf-tex.py b/llvm/test/CodeGen/NVPTX/surf-tex.py index 9607a58856bac..90d67666f1ed6 100644 --- a/llvm/test/CodeGen/NVPTX/surf-tex.py +++ b/llvm/test/CodeGen/NVPTX/surf-tex.py @@ -224,11 +224,6 @@ def get_ptx_surface(target): def get_surface_metadata(target, fun_ty, fun_name, has_surface_param): metadata = [] - md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format( - fun_ty=fun_ty, fun_name=fun_name - ) - metadata.append(md_kernel) - if target == "cuda": # When a parameter is lowered as a .surfref, it still has the # corresponding ld.param.u64, which is illegal. Do not emit the @@ -263,14 +258,14 @@ def gen_suld_tests(target, global_surf): ; CHECK-LABEL: .entry ${test_name}_param ; CHECK: ${instruction} ${reg_ret}, [${reg_surf}, ${reg_access}] ; - define void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) { + define ptx_kernel void @${test_name}_param(i64 %s, ${retty}* %ret, ${access}) { %val = tail call ${retty} @${intrinsic}(i64 %s, ${access}) store ${retty} %val, ${retty}* %ret ret void } ; CHECK-LABEL: .entry ${test_name}_global ; CHECK: ${instruction} ${reg_ret}, [${global_surf}, ${reg_access}] - define void @${test_name}_global(${retty}* %ret, ${access}) { + define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) %val = tail call ${retty} @${intrinsic}(i64 %gs, ${access}) store ${retty} %val, ${retty}* %ret @@ -356,13 +351,13 @@ def gen_sust_tests(target, global_surf): ; CHECK-LABEL: .entry ${test_name}_param ; CHECK: ${instruction} [${reg_surf}, ${reg_access}], ${reg_value} ; - define void @${test_name}_param(i64 %s, ${value}, ${access}) { + define ptx_kernel void @${test_name}_param(i64 %s, ${value}, ${access}) { tail call void @${intrinsic}(i64 %s, ${access}, ${value}) ret void } ; CHECK-LABEL: .entry ${test_name}_global ; CHECK: ${instruction} [${global_surf}, ${reg_access}], ${reg_value} - define void @${test_name}_global(${value}, ${access}) { + define ptx_kernel void @${test_name}_global(${value}, ${access}) { %gs = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_surf}) tail call void @${intrinsic}(i64 %gs, ${access}, ${value}) ret void @@ -420,19 +415,13 @@ def gen_sust_tests(target, global_surf): generated_items.append((params["intrinsic"], params["instruction"])) fun_name = test_name + "_param" - fun_ty = "void (i64, {value_ty}, {access_ty})*".format( - value_ty=get_llvm_value_type(vec, ctype), - access_ty=get_llvm_surface_access_type(geom), - ) + fun_ty = "ptr" generated_metadata += get_surface_metadata( target, fun_ty, fun_name, has_surface_param=True ) fun_name = test_name + "_global" - fun_ty = "void ({value_ty}, {access_ty})*".format( - value_ty=get_llvm_value_type(vec, ctype), - access_ty=get_llvm_surface_access_type(geom), - ) + fun_ty = "ptr" generated_metadata += get_surface_metadata( target, fun_ty, fun_name, has_surface_param=False ) @@ -559,11 +548,6 @@ def get_ptx_global_sampler(target, global_sampler): def get_texture_metadata(target, fun_ty, fun_name, has_texture_params): metadata = [] - md_kernel = '!{{{fun_ty} @{fun_name}, !"kernel", i32 1}}'.format( - fun_ty=fun_ty, fun_name=fun_name - ) - metadata.append(md_kernel) - if target == "cuda": # When a parameter is lowered as a .texref, it still has the # corresponding ld.param.u64, which is illegal. Do not emit the @@ -615,14 +599,14 @@ def gen_tex_tests(target, global_tex, global_sampler): ; CHECK-LABEL: .entry ${test_name}_param ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}] - define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) { + define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) { %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access}) store ${retty} %val, ${retty}* %ret ret void } ; CHECK-LABEL: .entry ${test_name}_global ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] - define void @${test_name}_global(${retty}* %ret, ${access}) { + define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) ${get_sampler_handle} %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access}) @@ -799,14 +783,14 @@ def gen_tld4_tests(target, global_tex, global_sampler): ; CHECK-LABEL: .entry ${test_name}_param ; CHECK: ${instruction} ${ptx_ret}, [${ptx_tex}, ${ptx_access}] - define void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) { + define ptx_kernel void @${test_name}_param(i64 %tex, ${sampler} ${retty}* %ret, ${access}) { %val = tail call ${retty} @${intrinsic}(i64 %tex, ${sampler} ${access}) store ${retty} %val, ${retty}* %ret ret void } ; CHECK-LABEL: .entry ${test_name}_global ; CHECK: ${instruction} ${ptx_ret}, [${global_tex}, ${ptx_global_sampler} ${ptx_access}] - define void @${test_name}_global(${retty}* %ret, ${access}) { + define ptx_kernel void @${test_name}_global(${retty}* %ret, ${access}) { %gt = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1i64(i64 addrspace(1)* @${global_tex}) ${get_sampler_handle} %val = tail call ${retty} @${intrinsic}(i64 %gt, ${sampler} ${access}) diff --git a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll index 881ea459feb48..5dc44cb1925b0 100644 --- a/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write-cuda.ll @@ -10,7 +10,7 @@ declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -define void @foo(i64 %img, i32 %val, i32 %idx) { +define ptx_kernel void @foo(i64 %img, i32 %val, i32 %idx) { ; CHECK-LABEL: foo( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -30,7 +30,7 @@ define void @foo(i64 %img, i32 %val, i32 %idx) { @surf0 = internal addrspace(1) global i64 0, align 8 -define void @bar(i32 %val, i32 %idx) { +define ptx_kernel void @bar(i32 %val, i32 %idx) { ; CHECK-LABEL: bar( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -47,8 +47,6 @@ define void @bar(i32 %val, i32 %idx) { } -!nvvm.annotations = !{!1, !2, !3} -!1 = !{ptr @foo, !"kernel", i32 1} -!2 = !{ptr @bar, !"kernel", i32 1} -!3 = !{ptr addrspace(1) @surf0, !"surface", i32 1} +!nvvm.annotations = !{!1} +!1 = !{ptr addrspace(1) @surf0, !"surface", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/surf-write.ll b/llvm/test/CodeGen/NVPTX/surf-write.ll index 258bb6d8b5b71..0e1f0cc700993 100644 --- a/llvm/test/CodeGen/NVPTX/surf-write.ll +++ b/llvm/test/CodeGen/NVPTX/surf-write.ll @@ -6,12 +6,11 @@ target triple = "nvptx-unknown-nvcl" declare void @llvm.nvvm.sust.b.1d.i32.trap(i64, i32, i32) ; CHECK: .entry foo -define void @foo(i64 %img, i32 %val, i32 %idx) { +define ptx_kernel void @foo(i64 %img, i32 %val, i32 %idx) { ; CHECK: sust.b.1d.b32.trap [foo_param_0, {%r{{[0-9]+}}}], {%r{{[0-9]+}}} tail call void @llvm.nvvm.sust.b.1d.i32.trap(i64 %img, i32 %idx, i32 %val) ret void } -!nvvm.annotations = !{!1, !2} -!1 = !{ptr @foo, !"kernel", i32 1} -!2 = !{ptr @foo, !"wroimage", i32 0} +!nvvm.annotations = !{!1} +!1 = !{ptr @foo, !"wroimage", i32 0} diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index ba556d2d9bd6b..61837bde82ece 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -10,7 +10,7 @@ target triple = "nvptx-unknown-cuda" declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64, i32) declare i64 @llvm.nvvm.texsurf.handle.internal.p1(ptr addrspace(1)) -define void @foo(i64 %img, ptr %red, i32 %idx) { +define ptx_kernel void @foo(i64 %img, ptr %red, i32 %idx) { ; CHECK-LABEL: foo( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; @@ -34,7 +34,7 @@ define void @foo(i64 %img, ptr %red, i32 %idx) { @tex0 = internal addrspace(1) global i64 0, align 8 -define void @bar(ptr %red, i32 %idx) { +define ptx_kernel void @bar(ptr %red, i32 %idx) { ; CHECK-LABEL: bar( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; @@ -57,7 +57,7 @@ define void @bar(ptr %red, i32 %idx) { declare float @texfunc(i64) -define void @baz(ptr %red, i32 %idx) { +define ptx_kernel void @baz(ptr %red, i32 %idx) { ; CHECK-LABEL: baz( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<2>; @@ -93,8 +93,5 @@ define void @baz(ptr %red, i32 %idx) { ret void } -!nvvm.annotations = !{!1, !2, !3, !4} -!1 = !{ptr @foo, !"kernel", i32 1} -!2 = !{ptr @bar, !"kernel", i32 1} -!3 = !{ptr addrspace(1) @tex0, !"texture", i32 1} -!4 = !{ptr @baz, !"kernel", i32 1} +!nvvm.annotations = !{!1} +!1 = !{ptr addrspace(1) @tex0, !"texture", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/tex-read.ll b/llvm/test/CodeGen/NVPTX/tex-read.ll index d11aea45a65f0..d74c89f5abc8d 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read.ll @@ -6,7 +6,7 @@ target triple = "nvptx64-unknown-nvcl" declare { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64, i64, i32) ; CHECK: .entry foo -define void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) { +define ptx_kernel void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) { ; CHECK: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [foo_param_0, foo_param_1, {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.1d.v4f32.s32(i64 %img, i64 %sampler, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 @@ -15,7 +15,6 @@ define void @foo(i64 %img, i64 %sampler, ptr %red, i32 %idx) { ret void } -!nvvm.annotations = !{!1, !2, !3} -!1 = !{ptr @foo, !"kernel", i32 1} +!nvvm.annotations = !{!2, !3} !2 = !{ptr @foo, !"rdoimage", i32 0} !3 = !{ptr @foo, !"sampler", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/unreachable.ll b/llvm/test/CodeGen/NVPTX/unreachable.ll index 286f3588a754f..80cf938d48b53 100644 --- a/llvm/test/CodeGen/NVPTX/unreachable.ll +++ b/llvm/test/CodeGen/NVPTX/unreachable.ll @@ -21,7 +21,7 @@ target triple = "nvptx-unknown-cuda" declare void @throw() #0 declare void @llvm.trap() #0 -define void @kernel_func() { +define ptx_kernel void @kernel_func() { ; NO-TRAP-UNREACHABLE-LABEL: kernel_func( ; NO-TRAP-UNREACHABLE: { ; NO-TRAP-UNREACHABLE-EMPTY: @@ -102,6 +102,3 @@ define void @kernel_func_2() { } attributes #0 = { noreturn } - -!nvvm.annotations = !{!1} -!1 = !{ptr @kernel_func, !"kernel", i32 1} diff --git a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll index 26ad59723abf0..82301e42f7d06 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll @@ -4,7 +4,7 @@ @GLOBAL = addrspace(1) externally_initialized global i32 0, align 4, !dbg !0 @SHARED = addrspace(3) externally_initialized global i32 undef, align 4, !dbg !6 -define void @test(float, ptr, ptr, i32) !dbg !17 { +define ptx_kernel void @test(float, ptr, ptr, i32) !dbg !17 { %5 = alloca float, align 4 %6 = alloca ptr, align 8 %7 = alloca ptr, align 8 @@ -38,7 +38,6 @@ define void @test(float, ptr, ptr, i32) !dbg !17 { declare void @llvm.dbg.declare(metadata, metadata, metadata) !llvm.dbg.cu = !{!2} -!nvvm.annotations = !{!10} !llvm.module.flags = !{!11, !12, !13, !14, !15} !llvm.ident = !{!16} @@ -52,7 +51,6 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) !7 = distinct !DIGlobalVariable(name: "SHARED", scope: !2, file: !8, line: 4, type: !9, isLocal: false, isDefinition: true) !8 = !DIFile(filename: "test.cu", directory: "/tmp") !9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!10 = !{ptr @test, !"kernel", i32 1} !11 = !{i32 2, !"Dwarf Version", i32 2} !12 = !{i32 2, !"Debug Info Version", i32 3} !13 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 55c81caaed056..c926229f96e38 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -59,7 +59,7 @@ ; CHECK: } ; Function Attrs: nounwind -define void @_Z5saxpyifPfS_(i32 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) local_unnamed_addr #0 !dbg !566 { +define ptx_kernel void @_Z5saxpyifPfS_(i32 %n, float %a, ptr nocapture readonly %x, ptr nocapture %y) local_unnamed_addr #0 !dbg !566 { entry: call void @llvm.dbg.value(metadata i32 %n, metadata !570, metadata !DIExpression()), !dbg !575 call void @llvm.dbg.value(metadata float %a, metadata !571, metadata !DIExpression()), !dbg !576 @@ -8496,7 +8496,6 @@ attributes #2 = { nounwind readnone speculatable } attributes #3 = { nounwind } !llvm.dbg.cu = !{!0} -!nvvm.annotations = !{!555, !556, !557, !556, !558, !558, !558, !558, !559, !559, !558} !llvm.module.flags = !{!560, !561, !562, !563} !llvm.ident = !{!564} !nvvm.internalize.after.link = !{} @@ -9057,11 +9056,6 @@ attributes #3 = { nounwind } !552 = !DISubprogram(name: "tgammaf", linkageName: "_ZL7tgammaff", scope: !444, file: !444, line: 1592, type: !13, isLocal: true, isDefinition: false, flags: DIFlagPrototyped, isOptimized: true) !553 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !5, entity: !554, file: !445, line: 459) !554 = !DISubprogram(name: "truncf", linkageName: "_ZL6truncff", scope: !462, file: !462, line: 662, type: !13, isLocal: true, isDefinition: false, flags: DIFlagPrototyped, isOptimized: true) -!555 = !{ptr @_Z5saxpyifPfS_, !"kernel", i32 1} -!556 = !{null, !"align", i32 8} -!557 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!558 = !{null, !"align", i32 16} -!559 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} !560 = !{i32 2, !"Dwarf Version", i32 2} !561 = !{i32 2, !"Debug Info Version", i32 3} !562 = !{i32 1, !"wchar_size", i32 4} diff --git a/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll b/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll index 8761122f756fc..e6b5991d8dfb3 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll @@ -13,7 +13,7 @@ target triple = "nvptx64-nvidia-cuda" ; That would be worthless, because "i" is simulated by two 32-bit registers and ; truncating it to 32-bit is as simple as directly using the register that ; contains the low bits. -define void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) { +define ptx_kernel void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) { ; CHECK-LABEL: @trunc_is_free( entry: %cmp.4 = icmp eq i64 %begin, %end @@ -41,5 +41,3 @@ for.body: ; preds = %for.body.preheader, declare void @_Z3usei(i32) -!nvvm.annotations = !{!0} -!0 = !{ptr @trunc_is_free, !"kernel", i32 1} diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll index 92766d5a11aa5..420e844b51039 100644 --- a/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll +++ b/llvm/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll @@ -11,7 +11,7 @@ target triple = "nvptx64-nvidia-cuda" ; use((b + i) * s); ; } ; } -define void @foo(i32 %b, i32 %s) { +define ptx_kernel void @foo(i32 %b, i32 %s) { ; CHECK-LABEL: .visible .entry foo( entry: ; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1]; @@ -65,7 +65,3 @@ for.inc.3: ; preds = %if.then.3, %for.inc declare zeroext i1 @cond(i32) declare void @use(i32) - -!nvvm.annotations = !{!0} - -!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index cf58bc5d8f475..659ab1227f113 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -237,15 +237,7 @@ class NVVMDialectLLVMIRTranslationInterface generateMetadata(value.getInt(), "maxnreg"); } else if (attribute.getName() == NVVM::NVVMDialect::getKernelFuncAttrName()) { - llvm::Metadata *llvmMetadataKernel[] = { - llvm::ValueAsMetadata::get(llvmFunc), - llvm::MDString::get(llvmContext, "kernel"), - llvm::ValueAsMetadata::get( - llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext), 1))}; - llvm::MDNode *llvmMetadataNode = - llvm::MDNode::get(llvmContext, llvmMetadataKernel); - moduleTranslation.getOrInsertNamedModuleMetadata("nvvm.annotations") - ->addOperand(llvmMetadataNode); + llvmFunc->setCallingConv(llvm::CallingConv::PTX_Kernel); } return success(); } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index b69d77496351c..2d7710e7cbf27 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -556,9 +556,7 @@ llvm.func @kernel_func() attributes {nvvm.kernel} { llvm.return } -// CHECK: !nvvm.annotations = -// CHECK-NOT: {ptr @nvvm_special_regs, !"kernel", i32 1} -// CHECK: {ptr @kernel_func, !"kernel", i32 1} +// CHECK: ptx_kernel void @kernel_func // ----- @@ -566,9 +564,8 @@ llvm.func @kernel_func() attributes {nvvm.kernel, nvvm.maxntid = array