From d41faf6da8a9eed8c32f6a62fa9ebf38d5824c2c Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 11 Aug 2024 01:39:46 +0300 Subject: [PATCH 01/12] Tweak AMDGCNSPIRV ABI to allow for the correct handling of aggregates passed to kernels / functions. --- clang/lib/CodeGen/Targets/SPIR.cpp | 73 +- .../amdgpu-kernel-arg-pointer-type.cu | 723 ++++++++++++++++-- clang/test/CodeGenCUDA/kernel-args.cu | 6 + 3 files changed, 731 insertions(+), 71 deletions(-) diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index cf068cbc4fcd3..1319332635b86 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -32,7 +32,9 @@ class SPIRVABIInfo : public CommonSPIRABIInfo { void computeInfo(CGFunctionInfo &FI) const override; private: + ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyKernelArgumentType(QualType Ty) const; + ABIArgInfo classifyArgumentType(QualType Ty) const; }; } // end anonymous namespace namespace { @@ -64,6 +66,27 @@ void CommonSPIRABIInfo::setCCs() { RuntimeCC = llvm::CallingConv::SPIR_FUNC; } +ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const { + if (getTarget().getTriple().getVendor() != llvm::Triple::AMD) + return DefaultABIInfo::classifyReturnType(RetTy); + if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI())) + return DefaultABIInfo::classifyReturnType(RetTy); + + if (const RecordType *RT = RetTy->getAs()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + } + + // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to + // avoid encoding various architecture specific bits here we return everything + // as direct to retain type info for things like aggregates, for later perusal + // when translating back to LLVM/lowering in the BE. This is also why we + // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU. + // This will be revisited / optimised in the future. + return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false); +} + ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { if (getContext().getLangOpts().CUDAIsDevice) { // Coerce pointer arguments with default address space to CrossWorkGroup @@ -78,18 +101,52 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } - // Force copying aggregate type in kernel arguments by value when - // compiling CUDA targeting SPIR-V. This is required for the object - // copied to be valid on the device. - // This behavior follows the CUDA spec - // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, - // and matches the NVPTX implementation. - if (isAggregateTypeForABI(Ty)) - return getNaturalAlignIndirect(Ty, /* byval */ true); + if (isAggregateTypeForABI(Ty)) { + if (getTarget().getTriple().getVendor() == llvm::Triple::AMD) + // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not + // currently expressible in SPIR-V; SPIR-V passes aggregates byval, + // which the AMDGPU kernel ABI does not allow. Passing aggregates as + // direct works around this impedance mismatch, as it retains type info + // and can be correctly handled, post reverse-translation, by the AMDGPU + // BE, which has to support this CC for legacy OpenCL purposes. It can + // be brittle and does lead to performance degradation in certain + // pathological cases. This will be revisited / optimised in the future, + // once a way to deal with the byref/byval impedance mismatch is + // identified. + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); + else + // Force copying aggregate type in kernel arguments by value when + // compiling CUDA targeting SPIR-V. This is required for the object + // copied to be valid on the device. + // This behavior follows the CUDA spec + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, + // and matches the NVPTX implementation. + return getNaturalAlignIndirect(Ty, /* byval */ true); + } } return classifyArgumentType(Ty); } +ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const { + if (getTarget().getTriple().getVendor() != llvm::Triple::AMD) + return DefaultABIInfo::classifyArgumentType(Ty); + if (!isAggregateTypeForABI(Ty)) + return DefaultABIInfo::classifyArgumentType(Ty); + + // Records with non-trivial destructors/copy-constructors should not be + // passed by value. + if (auto RAA = getRecordArgABI(Ty, getCXXABI())) + return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + + if (const RecordType *RT = Ty->getAs()) { + const RecordDecl *RD = RT->getDecl(); + if (RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyArgumentType(Ty); + } + + return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false); +} + void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const { // The logic is same as in DefaultABIInfo with an exception on the kernel // arguments handling. diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index 70c86cbb8c3d4..b295bbbdaaf95 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -1,8 +1,11 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: x86-registered-target // REQUIRES: amdgpu-registered-target -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s -// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK %s +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=CHECK-SPIRV %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=OPT-SPIRV // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s #include "Inputs/cuda.h" @@ -11,41 +14,260 @@ // global ones. // On the host-side compilation, generic pointer won't be coerced. -// HOST-NOT: %struct.S.coerce -// HOST-NOT: %struct.T.coerce - -// HOST: define{{.*}} void @_Z22__device_stub__kernel1Pi(ptr noundef %x) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(ptr addrspace(1){{.*}} %x.coerce) -// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr -// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}} -// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 -// OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4 -// OPT: ret void + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi( +// CHECK-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( +// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel1Pi( +// OPT-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( +// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi( +// HOST-SAME: ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) +// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel1Pi) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void +// __global__ void kernel1(int *x) { x[0]++; } -// HOST: define{{.*}} void @_Z22__device_stub__kernel2Ri(ptr noundef nonnull align 4 dereferenceable(4) %x) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(ptr addrspace(1){{.*}} nonnull align 4 dereferenceable(4) %x.coerce) -// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr -// OPT: [[VAL:%.*]] = load i32, ptr addrspace(1) %x.coerce, align 4{{$}} -// OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 -// OPT: store i32 [[INC]], ptr addrspace(1) %x.coerce, align 4 -// OPT: ret void +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri( +// CHECK-SAME: ptr addrspace(1) noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( +// CHECK-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel2Ri( +// OPT-SAME: ptr addrspace(1) nocapture noundef nonnull align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( +// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri( +// HOST-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) +// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel2Ri) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void +// __global__ void kernel2(int &x) { x++; } -// HOST: define{{.*}} void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(ptr addrspace(2) noundef %x, ptr addrspace(1) noundef %y) -// CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(ptr addrspace(2){{.*}} %x, ptr addrspace(1){{.*}} %y) -// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( +// CHECK-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8, addrspace(5) +// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[Y_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[Y_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0 +// CHECK-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( +// CHECK-SPIRV-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8 +// CHECK-SPIRV-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[Y_ADDR_ASCAST:%.*]] = addrspacecast ptr [[Y_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store ptr addrspace(2) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[Y]], ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(2), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[TMP0]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(2) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[Y_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP2]], i64 0 +// CHECK-SPIRV-NEXT: store i32 [[TMP1]], ptr addrspace(1) [[ARRAYIDX1]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i( +// OPT-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 +// OPT-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( +// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 +// OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i( +// HOST-SAME: ptr addrspace(2) noundef [[X:%.*]], ptr addrspace(1) noundef [[Y:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(2), align 8 +// HOST-NEXT: [[Y_ADDR:%.*]] = alloca ptr addrspace(1), align 8 +// HOST-NEXT: store ptr addrspace(2) [[X]], ptr [[X_ADDR]], align 8 +// HOST-NEXT: store ptr addrspace(1) [[Y]], ptr [[Y_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) +// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[Y_ADDR]], i64 8, i64 8) +// HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +// HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT1:.*]], label %[[SETUP_END]] +// HOST: [[SETUP_NEXT1]]: +// HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel3PU3AS2iPU3AS1i) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void +// __global__ void kernel3(__attribute__((address_space(2))) int *x, __attribute__((address_space(1))) int *y) { y[0] = x[0]; } -// COMMON-LABEL: define{{.*}} void @_Z4funcPi(ptr{{.*}} %x) -// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr +// CHECK-LABEL: define dso_local void @_Z4funcPi( +// CHECK-SAME: ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_func void @_Z4funcPi( +// CHECK-SPIRV-SAME: ptr addrspace(4) noundef [[X:%.*]]) addrspace(4) #[[ATTR1:[0-9]+]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local void @_Z4funcPi( +// OPT-SAME: ptr nocapture noundef [[X:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4 +// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-NEXT: store i32 [[INC]], ptr [[X]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( +// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[X]], align 4 +// OPT-SPIRV-NEXT: ret void +// __device__ void func(int *x) { x[0]++; } @@ -57,29 +279,202 @@ struct S { // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect // by-val). However, the enhanced address inferring pass should be able to // assume they are global pointers. +// For SPIR-V, since byref is not supported at the moment, we pass it as direct. + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( +// CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8, addrspace(5) +// CHECK-NEXT: [[S:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[S]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 1 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[Y]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 +// CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( +// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 +// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8 +// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 +// CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( +// OPT-SAME: ptr addrspace(4) nocapture noundef readonly byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4:![0-9]+]] +// OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1) +// OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8 +// OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]] +// OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1) +// OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]] +// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[TMP1]], align 4 +// OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4 +// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 +// OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP2]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( +// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S( +// HOST-SAME: ptr [[S_COERCE0:%.*]], ptr [[S_COERCE1:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 0 +// HOST-NEXT: store ptr [[S_COERCE0]], ptr [[TMP0]], align 8 +// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[S]], i32 0, i32 1 +// HOST-NEXT: store ptr [[S_COERCE1]], ptr [[TMP1]], align 8 +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[S]], i64 16, i64 0) +// HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +// HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel41S) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void // -// HOST: define{{.*}} void @_Z22__device_stub__kernel41S(ptr %s.coerce0, ptr %s.coerce1) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(ptr addrspace(4){{.*}} byref(%struct.S) align 8 %0) -// OPT: [[P0:%.*]] = load ptr, ptr addrspace(4) %0, align 8 -// OPT: [[G0:%.*]] ={{.*}} addrspacecast ptr [[P0]] to ptr addrspace(1) -// OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8 -// OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8 -// OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1) -// OPT: [[V0:%.*]] = load i32, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD:[0-9]+]] -// OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 -// OPT: store i32 [[INC]], ptr addrspace(1) [[G0]], align 4 -// OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4 -// OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00 -// OPT: store float [[ADD]], ptr addrspace(1) [[G1]], align 4 -// OPT: ret void __global__ void kernel4(struct S s) { s.x[0]++; s.y[0] += 1.f; } // If a pointer to struct is passed, only the pointer itself is coerced into the global one. -// HOST: define{{.*}} void @_Z22__device_stub__kernel5P1S(ptr noundef %s) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(ptr addrspace(1){{.*}} %s.coerce) + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( +// CHECK-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr +// CHECK-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr [[S_ASCAST]], align 8 +// CHECK-NEXT: [[S1:%.*]] = load ptr, ptr [[S_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[S1]], ptr [[S_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[Y]], align 8 +// CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 +// CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( +// CHECK-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[S_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[S_ADDR_ASCAST:%.*]] = addrspacecast ptr [[S_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[S_COERCE]], ptr addrspace(4) [[S_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[S1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[S1]], ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S:%.*]], ptr addrspace(4) [[TMP0]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[S_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[TMP3]], i32 0, i32 1 +// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel5P1S( +// OPT-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr #[[ATTR2]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(1) [[S_COERCE]], align 8 +// OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 +// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// OPT-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// OPT-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8 +// OPT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(1) [[Y]], align 8 +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 +// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// OPT-NEXT: store float [[ADD]], ptr [[TMP2]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( +// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8 +// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4 +// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 +// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 +// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S( +// HOST-SAME: ptr noundef [[S:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[S_ADDR]], i64 8, i64 0) +// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel5P1S) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void +// __global__ void kernel5(struct S *s) { s->x[0]++; s->y[0] += 1.f; @@ -91,29 +486,174 @@ struct T { // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect // by-val). However, the enhanced address inferring pass should be able to // assume they are global pointers. +// For SPIR-V, since byref is not supported at the moment, we pass it as direct. + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( +// CHECK-SAME: ptr addrspace(4) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8, addrspace(5) +// CHECK-NEXT: [[T:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr +// CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr align 8 [[T]], ptr addrspace(4) align 8 [[TMP0]], i64 16, i1 false) +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X]], i64 0, i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00 +// CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX1]], align 4 +// CHECK-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr [[T]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[X2]], i64 0, i64 1 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 +// CHECK-NEXT: store float [[ADD5]], ptr [[ARRAYIDX4]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( +// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8 +// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 +// CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 +// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1 +// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4 +// CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( +// OPT-SAME: ptr addrspace(4) nocapture noundef readonly byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[TMP0]], align 8, !amdgpu.noclobber [[META4]] +// OPT-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[COERCE_SROA_0_0_COPYLOAD]] to ptr addrspace(1) +// OPT-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 8 +// OPT-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr, ptr addrspace(4) [[COERCE_SROA_2_0__SROA_IDX]], align 8, !amdgpu.noclobber [[META4]] +// OPT-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[COERCE_SROA_2_0_COPYLOAD]] to ptr addrspace(1) +// OPT-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !amdgpu.noclobber [[META4]] +// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[TMP1]], align 4 +// OPT-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(1) [[TMP2]], align 4 +// OPT-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 +// OPT-NEXT: store float [[ADD5]], ptr addrspace(1) [[TMP2]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( +// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 +// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 +// OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 +// OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T( +// HOST-SAME: ptr [[T_COERCE0:%.*]], ptr [[T_COERCE1:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[T:%.*]] = alloca [[STRUCT_T:%.*]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 0 +// HOST-NEXT: store ptr [[T_COERCE0]], ptr [[TMP0]], align 8 +// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw { ptr, ptr }, ptr [[T]], i32 0, i32 1 +// HOST-NEXT: store ptr [[T_COERCE1]], ptr [[TMP1]], align 8 +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipSetupArgument(ptr [[T]], i64 16, i64 0) +// HOST-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +// HOST-NEXT: br i1 [[TMP3]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP4:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel61T) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void // -// HOST: define{{.*}} void @_Z22__device_stub__kernel61T(ptr %t.coerce0, ptr %t.coerce1) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(ptr addrspace(4){{.*}} byref(%struct.T) align 8 %0) -// OPT: [[P0:%.*]] = load ptr, ptr addrspace(4) %0, align 8 -// OPT: [[G0:%.*]] ={{.*}} addrspacecast ptr [[P0]] to ptr addrspace(1) -// OPT: [[R1:%.*]] = getelementptr inbounds i8, ptr addrspace(4) %0, i64 8 -// OPT: [[P1:%.*]] = load ptr, ptr addrspace(4) [[R1]], align 8 -// OPT: [[G1:%.*]] ={{.*}} addrspacecast ptr [[P1]] to ptr addrspace(1) -// OPT: [[V0:%.*]] = load float, ptr addrspace(1) [[G0]], align 4, !amdgpu.noclobber ![[MD]] -// OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00 -// OPT: store float [[ADD0]], ptr addrspace(1) [[G0]], align 4 -// OPT: [[V1:%.*]] = load float, ptr addrspace(1) [[G1]], align 4 -// OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00 -// OPT: store float [[ADD1]], ptr addrspace(1) [[G1]], align 4 -// OPT: ret void __global__ void kernel6(struct T t) { t.x[0][0] += 1.f; t.x[1][0] += 2.f; } // Check that coerced pointers retain the noalias attribute when qualified with __restrict. -// HOST: define{{.*}} void @_Z22__device_stub__kernel7Pi(ptr noalias noundef %x) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(ptr addrspace(1) noalias{{.*}} %x.coerce) + +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi( +// CHECK-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X]] to ptr +// CHECK-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 +// CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( +// CHECK-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[X:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ADDR:%.*]] = alloca ptr addrspace(4), align 8 +// CHECK-SPIRV-NEXT: [[X_ASCAST:%.*]] = addrspacecast ptr [[X]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr [[X_ADDR]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP0]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel7Pi( +// OPT-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( +// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi( +// HOST-SAME: ptr noalias noundef [[X:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// HOST-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[X_ADDR]], i64 8, i64 0) +// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel7Pi) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void +// __global__ void kernel7(int *__restrict x) { x[0]++; } @@ -122,13 +662,70 @@ __global__ void kernel7(int *__restrict x) { struct SS { float *x; }; -// HOST: define{{.*}} void @_Z22__device_stub__kernel82SS(ptr %a.coerce) -// COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(ptr addrspace(1){{.*}} %a.coerce) -// CHECK-NOT: ={{.*}} addrspacecast ptr addrspace(1) %{{.*}} to ptr -// OPT: [[VAL:%.*]] = load float, ptr addrspace(1) %a.coerce, align 4{{$}} -// OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00 -// OPT: store float [[INC]], ptr addrspace(1) %a.coerce, align 4 -// OPT: ret void +// CHECK-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( +// CHECK-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8, addrspace(5) +// CHECK-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr +// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0 +// CHECK-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 +// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 +// CHECK-NEXT: store float [[ADD]], ptr [[TMP0]], align 4 +// CHECK-NEXT: ret void +// +// CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( +// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] { +// CHECK-SPIRV-NEXT: [[ENTRY:.*:]] +// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8 +// CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 +// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 +// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 +// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 +// CHECK-SPIRV-NEXT: ret void +// +// OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( +// OPT-SAME: ptr addrspace(1) nocapture [[A_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// OPT-NEXT: [[ENTRY:.*:]] +// OPT-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(1) [[A_COERCE]], align 4 +// OPT-NEXT: [[ADD:%.*]] = fadd contract float [[TMP0]], 3.000000e+00 +// OPT-NEXT: store float [[ADD]], ptr addrspace(1) [[A_COERCE]], align 4 +// OPT-NEXT: ret void +// +// OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( +// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-NEXT: [[ENTRY:.*:]] +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: ret void +// +// HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS( +// HOST-SAME: ptr [[A_COERCE:%.*]]) #[[ATTR0]] { +// HOST-NEXT: [[ENTRY:.*:]] +// HOST-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 +// HOST-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr [[A]], i32 0, i32 0 +// HOST-NEXT: store ptr [[A_COERCE]], ptr [[COERCE_DIVE]], align 8 +// HOST-NEXT: [[TMP0:%.*]] = call i32 @hipSetupArgument(ptr [[A]], i64 8, i64 0) +// HOST-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// HOST-NEXT: br i1 [[TMP1]], label %[[SETUP_NEXT:.*]], label %[[SETUP_END:.*]] +// HOST: [[SETUP_NEXT]]: +// HOST-NEXT: [[TMP2:%.*]] = call i32 @hipLaunchByPtr(ptr @_Z7kernel82SS) +// HOST-NEXT: br label %[[SETUP_END]] +// HOST: [[SETUP_END]]: +// HOST-NEXT: ret void +// __global__ void kernel8(struct SS a) { *a.x += 3.f; } +//. +// OPT: [[META4]] = !{} +//. diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu index bcce729f14481..8d17d89b315de 100644 --- a/clang/test/CodeGenCUDA/kernel-args.cu +++ b/clang/test/CodeGenCUDA/kernel-args.cu @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCN %s +// RUN: %clang_cc1 -x hip -triple spirv64-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm %s -o - | FileCheck -check-prefix=AMDGCNSPIRV %s // RUN: %clang_cc1 -x cuda -triple nvptx64-nvidia-cuda- -fcuda-is-device \ // RUN: -emit-llvm %s -o - | FileCheck -check-prefix=NVPTX %s #include "Inputs/cuda.h" @@ -10,6 +12,7 @@ struct A { }; // AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}) +// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(%struct.A %{{.+}}) // NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x) __global__ void kernel(A x) { } @@ -17,6 +20,7 @@ __global__ void kernel(A x) { class Kernel { public: // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}) + // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %{{.+}}) // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x) static __global__ void memberKernel(A x){} template static __global__ void templateMemberKernel(T x) {} @@ -31,10 +35,12 @@ void launch(void*); void test() { Kernel K; // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}} + // AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(%struct.A %{{.+}}) // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x) launch((void*)templateKernel); // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}} + // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %{{.+}} // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x) launch((void*)Kernel::templateMemberKernel); } From 757e119809bc5e088eb85118aae028b24e062081 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 11 Aug 2024 02:27:25 +0300 Subject: [PATCH 02/12] Fix formatting error. --- clang/lib/CodeGen/Targets/SPIR.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 1319332635b86..3fca9c52d5c29 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -101,7 +101,7 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { return ABIArgInfo::getDirect(LTy, 0, nullptr, false); } - if (isAggregateTypeForABI(Ty)) { + if (isAggregateTypeForABI(Ty)) { if (getTarget().getTriple().getVendor() == llvm::Triple::AMD) // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not // currently expressible in SPIR-V; SPIR-V passes aggregates byval, From 13f83ac0870182c0e56ce4fbdc6815da6be256c1 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 19 Aug 2024 17:45:19 +0100 Subject: [PATCH 03/12] No else after return. --- clang/lib/CodeGen/Targets/SPIR.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 3fca9c52d5c29..cc52925e2e523 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -114,14 +114,13 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { // once a way to deal with the byref/byval impedance mismatch is // identified. return ABIArgInfo::getDirect(LTy, 0, nullptr, false); - else - // Force copying aggregate type in kernel arguments by value when - // compiling CUDA targeting SPIR-V. This is required for the object - // copied to be valid on the device. - // This behavior follows the CUDA spec - // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, - // and matches the NVPTX implementation. - return getNaturalAlignIndirect(Ty, /* byval */ true); + // Force copying aggregate type in kernel arguments by value when + // compiling CUDA targeting SPIR-V. This is required for the object + // copied to be valid on the device. + // This behavior follows the CUDA spec + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing, + // and matches the NVPTX implementation. + return getNaturalAlignIndirect(Ty, /* byval */ true); } } return classifyArgumentType(Ty); From daa76c36453b2e133d7d9496ca930d0eaa742fab Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 28 Aug 2024 17:25:29 +0100 Subject: [PATCH 04/12] Re-work SPIR-V support for memory scopes. --- clang/lib/Basic/Targets/SPIR.h | 6 + clang/lib/CodeGen/CGAtomic.cpp | 11 +- clang/lib/CodeGen/Targets/SPIR.cpp | 40 +++ clang/test/CodeGen/scoped-atomic-ops.c | 336 ++++++++++++------ clang/test/Sema/scoped-atomic-ops.c | 1 + llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 3 +- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 39 +- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 18 + llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 + .../CodeGen/SPIRV/AtomicCompareExchange.ll | 6 +- llvm/test/CodeGen/SPIRV/atomicrmw.ll | 26 +- .../atomicrmw_faddfsub_double.ll | 7 +- .../atomicrmw_faddfsub_float.ll | 7 +- .../atomicrmw_faddfsub_half.ll | 7 +- .../atomicrmw_fminfmax_double.ll | 7 +- .../atomicrmw_fminfmax_float.ll | 7 +- .../atomicrmw_fminfmax_half.ll | 7 +- llvm/test/CodeGen/SPIRV/fence.ll | 10 +- .../CodeGen/SPIRV/instructions/atomic-ptr.ll | 2 +- .../test/CodeGen/SPIRV/instructions/atomic.ll | 33 +- .../SPIRV/instructions/atomic_acqrel.ll | 4 +- .../CodeGen/SPIRV/instructions/atomic_seq.ll | 4 +- llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll | 163 +++++++++ 23 files changed, 543 insertions(+), 203 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h index 37cf9d7921bac..8a26db7971cba 100644 --- a/clang/lib/Basic/Targets/SPIR.h +++ b/clang/lib/Basic/Targets/SPIR.h @@ -335,6 +335,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public BaseSPIRVTargetInfo { PointerWidth = PointerAlign = 32; SizeType = TargetInfo::UnsignedInt; PtrDiffType = IntPtrType = TargetInfo::SignedInt; + // SPIR-V has core support for atomic ops, and Int32 is always available; + // we take the maximum because it's possible the Host supports wider types. + MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 32); resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } @@ -356,6 +359,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public BaseSPIRVTargetInfo { PointerWidth = PointerAlign = 64; SizeType = TargetInfo::UnsignedLong; PtrDiffType = IntPtrType = TargetInfo::SignedLong; + // SPIR-V has core support for atomic ops, and Int64 is always available; + // we take the maximum because it's possible the Host supports wider types. + MaxAtomicInlineWidth = std::max(MaxAtomicInlineWidth, 64); resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-" "v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"); } diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index fbe9569e50ef6..ba6ee4c0be3b7 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -766,8 +766,17 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // LLVM atomic instructions always have synch scope. If clang atomic // expression has no scope operand, use default LLVM synch scope. if (!ScopeModel) { + llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID(""); + if (CGF.getLangOpts().OpenCL) + // OpenCL approach is: "The functions that do not have memory_scope argument + // have the same semantics as the corresponding functions with the + // memory_scope argument set to memory_scope_device." See ref.: // + // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions + SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(), + SyncScope::OpenCLDevice, + Order, CGF.getLLVMContext()); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, - Order, CGF.CGM.getLLVMContext().getOrInsertSyncScopeID("")); + Order, SS); return; } diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index cc52925e2e523..a90741c0c0d32 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -58,6 +58,10 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) : CommonSPIRTargetCodeGenInfo(std::make_unique(CGT)) {} void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; + llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts, + SyncScope Scope, + llvm::AtomicOrdering Ordering, + llvm::LLVMContext &Ctx) const override; }; } // End anonymous namespace. @@ -188,6 +192,42 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( } } +llvm::SyncScope::ID +SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, + SyncScope Scope, + llvm::AtomicOrdering, + llvm::LLVMContext &Ctx) const { + std::string Name; + switch (Scope) { + case SyncScope::HIPSingleThread: + case SyncScope::SingleScope: + Name = "singlethread"; + break; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + case SyncScope::WavefrontScope: + Name = "subgroup"; + break; + case SyncScope::HIPWorkgroup: + case SyncScope::OpenCLWorkGroup: + case SyncScope::WorkgroupScope: + Name = "workgroup"; + break; + case SyncScope::HIPAgent: + case SyncScope::OpenCLDevice: + case SyncScope::DeviceScope: + Name = "device"; + break; + case SyncScope::SystemScope: + case SyncScope::HIPSystem: + case SyncScope::OpenCLAllSVMDevices: + Name = "all_svm_devices"; + break; + } + + return Ctx.getOrInsertSyncScopeID(Name); +} + /// Construct a SPIR-V target extension type for the given OpenCL image type. static llvm::Type *getSPIRVImageType(llvm::LLVMContext &Ctx, StringRef BaseType, StringRef OpenCLName, diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index b0032046639b8..24f1613e8af4e 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -1,12 +1,21 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ -// RUN: -fvisibility=hidden | FileCheck %s +// RUN: -fvisibility=hidden | FileCheck --check-prefix=AMDGCN %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ +// RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s -// CHECK-LABEL: define hidden i32 @fi1a( -// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi1a( +// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV: define hidden spir_func i32 @fi1a( +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread") monotonic, align 4 int fi1a(int *i) { int v; __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -17,13 +26,18 @@ int fi1a(int *i) { return v; } -// CHECK-LABEL: define hidden i32 @fi1b( -// CHECK: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 -// +// AMDGCN-LABEL: define hidden i32 @fi1b( +// AMDGCN: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi1b( +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 int fi1b(int *i) { *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -33,13 +47,18 @@ int fi1b(int *i) { return *i; } -// CHECK-LABEL: define hidden void @fi2a( -// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 -// +// AMDGCN-LABEL: define hidden void @fi2a( +// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi2a( +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 void fi2a(int *i) { int v = 1; __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -49,12 +68,18 @@ void fi2a(int *i) { __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden void @fi2b( -// CHECK: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi2b( +// AMDGCN: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi2b( +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread") monotonic, align 4 void fi2b(int *i) { __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -63,15 +88,24 @@ void fi2b(int *i) { __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden void @fi3a( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3a( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("all_svm_devices") monotonic, align 4 void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -83,15 +117,24 @@ void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden void @fi3b( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("agent-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3b( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("device") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("device") monotonic, align 4 void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); @@ -103,15 +146,24 @@ void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden void @fi3c( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3c( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("workgroup") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("workgroup") monotonic, align 4 void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); @@ -123,15 +175,24 @@ void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden void @fi3d( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3d( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("subgroup") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("subgroup") monotonic, align 4 void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); @@ -143,15 +204,24 @@ void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden void @fi3e( -// CHECK: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4 -// CHECK: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden void @fi3e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func void @fi3e( +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("singlethread") monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("singlethread") monotonic, align 4 void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); @@ -163,8 +233,10 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden zeroext i1 @fi4a( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4a( +// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a( +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4 _Bool fi4a(int *i) { int cmp = 0; int desired = 1; @@ -173,8 +245,10 @@ _Bool fi4a(int *i) { __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi4b( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4b( +// AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4b( +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4 _Bool fi4b(int *i) { int cmp = 0; int desired = 1; @@ -183,8 +257,10 @@ _Bool fi4b(int *i) { __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi4c( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4c( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4c( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4 _Bool fi4c(int *i) { int cmp = 0; int desired = 1; @@ -193,8 +269,10 @@ _Bool fi4c(int *i) { __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi4d( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4d( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4d( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4 _Bool fi4d(int *i) { int cmp = 0; int desired = 1; @@ -203,8 +281,10 @@ _Bool fi4d(int *i) { __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi4e( -// CHECK: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi4e( +// AMDGCN: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4e( +// SPIRV: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4 _Bool fi4e(int *i) { int cmp = 0; int desired = 1; @@ -213,8 +293,10 @@ _Bool fi4e(int *i) { __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden zeroext i1 @fi5a( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5a( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4 _Bool fi5a(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -222,8 +304,10 @@ _Bool fi5a(int *i) { __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi5b( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5b( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("agent-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5b( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("device") acquire acquire, align 4 _Bool fi5b(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -231,101 +315,127 @@ _Bool fi5b(int *i) { __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi5c( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5c( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5c( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("workgroup") acquire acquire, align 4 _Bool fi5c(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi5d( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5d( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("wavefront-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5d( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("subgroup") acquire acquire, align 4 _Bool fi5d(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi5e( -// CHECK: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// AMDGCN-LABEL: define hidden zeroext i1 @fi5e( +// AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread-one-as") acquire acquire, align 4 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5e( +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("singlethread") acquire acquire, align 4 _Bool fi5e(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n( i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SINGLE); } -// CHECK-LABEL: define hidden i32 @fi6a( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6a( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4 int fi6a(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); return ret; } -// CHECK-LABEL: define hidden i32 @fi6b( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6b( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("device") monotonic, align 4 int fi6b(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); return ret; } -// CHECK-LABEL: define hidden i32 @fi6c( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6c( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("workgroup") monotonic, align 4 int fi6c(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); return ret; } -// CHECK-LABEL: define hidden i32 @fi6d( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6d( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("subgroup") monotonic, align 4 int fi6d(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); return ret; } -// CHECK-LABEL: define hidden i32 @fi6e( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// AMDGCN-LABEL: define hidden i32 @fi6e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 4 +// SPIRV-LABEL: define hidden spir_func i32 @fi6e( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("singlethread") monotonic, align 4 int fi6e(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); return ret; } -// CHECK-LABEL: define hidden zeroext i1 @fi7a( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7a( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 1 _Bool fi7a(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); } -// CHECK-LABEL: define hidden zeroext i1 @fi7b( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7b( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("agent-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7b( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("device") monotonic, align 1 _Bool fi7b(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); } -// CHECK-LABEL: define hidden zeroext i1 @fi7c( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7c( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7c( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("workgroup") monotonic, align 1 _Bool fi7c(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); } -// CHECK-LABEL: define hidden zeroext i1 @fi7d( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7d( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("wavefront-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7d( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("subgroup") monotonic, align 1 _Bool fi7d(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); } -// CHECK-LABEL: define hidden zeroext i1 @fi7e( -// CHECK: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1 +// AMDGCN-LABEL: define hidden zeroext i1 @fi7e( +// AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread-one-as") monotonic, align 1 +// SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7e( +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("singlethread") monotonic, align 1 _Bool fi7e(_Bool *c) { - return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, + return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); } diff --git a/clang/test/Sema/scoped-atomic-ops.c b/clang/test/Sema/scoped-atomic-ops.c index 59e638c646664..33044aa256cb0 100644 --- a/clang/test/Sema/scoped-atomic-ops.c +++ b/clang/test/Sema/scoped-atomic-ops.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -x c -triple=amdgcn-amd-amdhsa -verify -fsyntax-only %s // RUN: %clang_cc1 -x c -triple=x86_64-pc-linux-gnu -verify -fsyntax-only %s +// RUN: %clang_cc1 -x c -triple=spirv64-unknown-unknown -verify -fsyntax-only %s int fi1a(int *i) { int v; diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 4175f766ac69a..7b7ccaf7cd765 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -1353,7 +1353,8 @@ Instruction *SPIRVEmitIntrinsics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { SmallVector Args; for (auto &Op : I.operands()) Args.push_back(Op); - Args.push_back(B.getInt32(I.getSyncScopeID())); + Args.push_back(B.getInt32( + static_cast(getMemScope(I.getContext(), I.getSyncScopeID())))); Args.push_back(B.getInt32( static_cast(getMemSemantics(I.getSuccessOrdering())))); Args.push_back(B.getInt32( diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 9e10d947081cc..8957a8b11e92a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -33,7 +33,8 @@ #include "llvm/Support/Debug.h" namespace { - +// TODO: consider removing this altogether and merely inserting the scopes +// directly in SetupMF. struct SyncScopeIDs { llvm::SyncScope::ID Work_ItemSSID; llvm::SyncScope::ID WorkGroupSSID; @@ -43,11 +44,11 @@ struct SyncScopeIDs { SyncScopeIDs() {} SyncScopeIDs(llvm::LLVMContext &Context) { - Work_ItemSSID = Context.getOrInsertSyncScopeID("work_item"); + Work_ItemSSID = Context.getOrInsertSyncScopeID("singlethread"); WorkGroupSSID = Context.getOrInsertSyncScopeID("workgroup"); DeviceSSID = Context.getOrInsertSyncScopeID("device"); AllSVMDevicesSSID = Context.getOrInsertSyncScopeID("all_svm_devices"); - SubGroupSSID = Context.getOrInsertSyncScopeID("sub_group"); + SubGroupSSID = Context.getOrInsertSyncScopeID("subgroup"); } }; @@ -781,28 +782,6 @@ bool SPIRVInstructionSelector::selectBitcast(Register ResVReg, return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); } -static SPIRV::Scope::Scope getScope(SyncScope::ID Ord, - const SyncScopeIDs &SSIDs) { - if (Ord == SyncScope::SingleThread || Ord == SSIDs.Work_ItemSSID) - return SPIRV::Scope::Invocation; - else if (Ord == SyncScope::System || Ord == SSIDs.DeviceSSID) - return SPIRV::Scope::Device; - else if (Ord == SSIDs.WorkGroupSSID) - return SPIRV::Scope::Workgroup; - else if (Ord == SSIDs.AllSVMDevicesSSID) - return SPIRV::Scope::CrossDevice; - else if (Ord == SSIDs.SubGroupSSID) - return SPIRV::Scope::Subgroup; - else - // OpenCL approach is: "The functions that do not have memory_scope argument - // have the same semantics as the corresponding functions with the - // memory_scope argument set to memory_scope_device." See ref.: // - // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions - // In our case if the scope is unknown, assuming that SPIR-V code is to be - // consumed in an OpenCL environment, we use the same approach and set the - // scope to memory_scope_device. - return SPIRV::Scope::Device; -} static void addMemoryOperands(MachineMemOperand *MemOp, MachineInstrBuilder &MIB) { @@ -957,7 +936,8 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg, assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); uint32_t Scope = - static_cast(getScope(MemOp->getSyncScopeID(), SSIDs)); + static_cast(getMemScope(GR.CurMF->getFunction().getContext(), + MemOp->getSyncScopeID())); Register ScopeReg = buildI32Constant(Scope, I); Register Ptr = I.getOperand(1).getReg(); @@ -1028,7 +1008,9 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { uint32_t MemSem = static_cast(getMemSemantics(AO)); Register MemSemReg = buildI32Constant(MemSem, I); SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm()); - uint32_t Scope = static_cast(getScope(Ord, SSIDs)); + uint32_t Scope = + static_cast(getMemScope(GR.CurMF->getFunction().getContext(), + Ord)); Register ScopeReg = buildI32Constant(Scope, I); MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier)) @@ -1048,7 +1030,8 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); unsigned Scope = - static_cast(getScope(MemOp->getSyncScopeID(), SSIDs)); + static_cast(getMemScope(GR.CurMF->getFunction().getContext(), + MemOp->getSyncScopeID())); ScopeReg = buildI32Constant(Scope, I); unsigned ScSem = static_cast( diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 927683ad7e32b..15f577f0e1fc3 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -251,6 +251,24 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) { llvm_unreachable(nullptr); } +SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID) { + SmallVector SSNs; + Ctx.getSyncScopeNames(SSNs); + + StringRef MemScope = SSNs[ID]; + if (MemScope.empty() || MemScope == "all_svm_devices") + return SPIRV::Scope::CrossDevice; + if (MemScope == "device") + return SPIRV::Scope::Device; + if (MemScope == "workgroup") + return SPIRV::Scope::Workgroup; + if (MemScope == "subgroup") + return SPIRV::Scope::Subgroup; + if (MemScope == "singlethread") + return SPIRV::Scope::Invocation; + return SPIRV::Scope::Device; // Follow OpenCL convention for now. +} + MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, const MachineRegisterInfo *MRI) { MachineInstr *MI = MRI->getVRegDef(ConstReg); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index c757af6b8aa72..cad94fb36ee49 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -75,6 +75,8 @@ getMemSemanticsForStorageClass(SPIRV::StorageClass::StorageClass SC); SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord); +SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID); + // Find def instruction for the given ConstReg, walking through // spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def // of OpConstant instruction. diff --git a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll index 323afec7f35f8..9e19413a15db1 100644 --- a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll +++ b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; CHECK-SPIRV: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-SPIRV-DAG: %[[#MemScope_Device:]] = OpConstant %[[#Int]] 1 +; CHECK-SPIRV-DAG: %[[#MemScope_AllSvmDevices:]] = OpConstant %[[#Int]] 0 ; CHECK-SPIRV-DAG: %[[#MemSemEqual_SeqCst:]] = OpConstant %[[#Int]] 16 ; CHECK-SPIRV-DAG: %[[#MemSemUnequal_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-SPIRV-DAG: %[[#Constant_456:]] = OpConstant %[[#Int]] 456 @@ -11,7 +11,7 @@ ; CHECK-SPIRV-DAG: %[[#UndefStruct:]] = OpUndef %[[#Struct]] ; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr:]] -; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_Device]] +; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_AllSvmDevices]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator:]] ; CHECK-SPIRV: %[[#Success:]] = OpIEqual %[[#]] %[[#Res]] %[[#Comparator]] ; CHECK-SPIRV: %[[#Composite_0:]] = OpCompositeInsert %[[#Struct]] %[[#Res]] %[[#UndefStruct]] 0 @@ -34,7 +34,7 @@ cmpxchg.continue: ; preds = %cmpxchg.store_expec ret void } -; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_Device]] +; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_AllSvmDevices]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Constant_456]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Success_1:]] = OpIEqual %[[#]] %[[#Res_1]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Composite:]] = OpCompositeInsert %[[#Struct]] %[[#Res_1]] %[[#UndefStruct]] 0 diff --git a/llvm/test/CodeGen/SPIRV/atomicrmw.ll b/llvm/test/CodeGen/SPIRV/atomicrmw.ll index 5f95a974ba671..fa80bc97ab7cd 100644 --- a/llvm/test/CodeGen/SPIRV/atomicrmw.ll +++ b/llvm/test/CodeGen/SPIRV/atomicrmw.ll @@ -5,8 +5,8 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1{{$}} -; CHECK-DAG: %[[#MemSem_Relaxed:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Scope_AllSvmDevices:]] = OpConstant %[[#Int]] 0{{$}} +;; %[[#MemSem_Relaxed:]] = %[[#Scope_AllSvmDevices:]] Constant 0 re-used for scope & semantics ; CHECK-DAG: %[[#MemSem_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-DAG: %[[#MemSem_Release:]] = OpConstant %[[#Int]] 4{{$}} ; CHECK-DAG: %[[#MemSem_AcquireRelease:]] = OpConstant %[[#Int]] 8 @@ -25,37 +25,37 @@ define dso_local spir_func void @test_atomicrmw() local_unnamed_addr { entry: %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]] %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst -; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#Scope_AllSvmDevices]] %[[#Value]] %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Acquire]] %[[#Value]] %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Release]] %[[#Value]] %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]] %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst -; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Relaxed]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#Scope_AllSvmDevices]] %[[#Value]] %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Acquire]] %[[#Value]] %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Release]] %[[#Value]] %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_Device]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]] ret void } diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll index 14035a68c81aa..c2ed2f8f62fc8 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_double.ll @@ -10,13 +10,14 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP64]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll index d34811496e5a1..075e63ea6de61 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_float.ll @@ -10,15 +10,16 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] +; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 ; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[ScopeWorkgroup:[0-9]+]] = OpConstant %[[TyInt32]] 2 -; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 ; CHECK-DAG: %[[WorkgroupMemory:[0-9]+]] = OpConstant %[[TyInt32]] 512 ; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFAddEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeWorkgroup]] %[[WorkgroupMemory]] %[[Const42]] ; CHECK: %[[Neg42:[0-9]+]] = OpFNegate %[[TyFP32]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll index 7da99411ae530..2c938409846d3 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_add/atomicrmw_faddfsub_half.ll @@ -13,13 +13,14 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP16]] 20800 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: %[[Const42Neg:[0-9]+]] = OpFNegate %[[TyFP16]] %[[Const42]] -; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42Neg]] +; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42Neg]] ; CHECK: OpAtomicFAddEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll index a2d0a594c861d..fdc05f4eac06b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_double.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP64]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP64]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP64Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP64]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP64Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] -; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMinEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMaxEXT %[[TyFP64]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll index 896b7acc1c87b..a7ff448a98b98 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_float.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP32]] 0 ; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[TyFP32]] 42 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP32Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP32]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP32Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] -; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] +; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMinEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] ; CHECK: OpAtomicFMaxEXT %[[TyFP32]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[Const42]] diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll index b3f48711707a1..d5576d1911a8b 100644 --- a/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_EXT_shader_atomic_float_min_max/atomicrmw_fminfmax_half.ll @@ -10,12 +10,13 @@ ; CHECK-DAG: %[[TyInt32:[0-9]+]] = OpTypeInt 32 0 ; CHECK-DAG: %[[Const0:[0-9]+]] = OpConstant %[[TyFP16]] 0 ; CHECK-DAG: %[[ConstHalf:[0-9]+]] = OpConstant %[[TyFP16]] 20800 -; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 +; CHECK-DAG: %[[ScopeAllSvmDevices:[0-9]+]] = OpConstantNull %[[TyInt32]] ; CHECK-DAG: %[[MemSeqCst:[0-9]+]] = OpConstant %[[TyInt32]] 16 +; CHECK-DAG: %[[ScopeDevice:[0-9]+]] = OpConstant %[[TyInt32]] 1 ; CHECK-DAG: %[[TyFP16Ptr:[0-9]+]] = OpTypePointer {{[a-zA-Z]+}} %[[TyFP16]] ; CHECK-DAG: %[[DblPtr:[0-9]+]] = OpVariable %[[TyFP16Ptr]] {{[a-zA-Z]+}} %[[Const0]] -; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] -; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] +; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]] +; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeAllSvmDevices]] %[[MemSeqCst]] %[[ConstHalf]] ; CHECK: OpAtomicFMinEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] ; CHECK: OpAtomicFMaxEXT %[[TyFP16]] %[[DblPtr]] %[[ScopeDevice]] %[[MemSeqCst]] %[[ConstHalf]] diff --git a/llvm/test/CodeGen/SPIRV/fence.ll b/llvm/test/CodeGen/SPIRV/fence.ll index 5da58667f24f2..c7496c15f2c95 100644 --- a/llvm/test/CodeGen/SPIRV/fence.ll +++ b/llvm/test/CodeGen/SPIRV/fence.ll @@ -3,16 +3,16 @@ ; CHECK-DAG: OpName %[[#GetScope:]] "_Z8getScopev" ; CHECK-DAG: %[[#Long:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#ScopeDevice:]] = OpConstant %[[#Long]] 1 ; CHECK-DAG: %[[#WrkGrpConst2:]] = OpConstant %[[#Long]] 2 -; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3 +; CHECK-DAG: %[[#ScopeAllSvmDevices:]] = OpConstantNull %[[#Long]] ; CHECK-DAG: %[[#InvocationConst4:]] = OpConstant %[[#Long]] 4 ; CHECK-DAG: %[[#Const8:]] = OpConstant %[[#Long]] 8 ; CHECK-DAG: %[[#Const16:]] = OpConstant %[[#Long]] 16 +; CHECK-DAG: %[[#Const3:]] = OpConstant %[[#Long]] 3 ; CHECK-DAG: %[[#Const912:]] = OpConstant %[[#Long]] 912 -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#WrkGrpConst2]] -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#InvocationConst4]] -; CHECK: OpMemoryBarrier %[[#ScopeDevice]] %[[#Const8]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#WrkGrpConst2]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#InvocationConst4]] +; CHECK: OpMemoryBarrier %[[#ScopeAllSvmDevices]] %[[#Const8]] ; CHECK: OpMemoryBarrier %[[#InvocationConst4]] %[[#Const16]] ; CHECK: OpMemoryBarrier %[[#WrkGrpConst2]] %[[#InvocationConst4]] ; CHECK: OpFunctionEnd diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll index 9469d24b20af2..54d0843cbf234 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic-ptr.ll @@ -9,7 +9,7 @@ ; CHECK-DAG: %[[#LongTy:]] = OpTypeInt 64 0 ; CHECK-DAG: %[[#PtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#LongTy]] ; CHECK-DAG: %[[#IntTy:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope:]] = OpConstant %[[#IntTy]] 1 +; CHECK-DAG: %[[#Scope:]] = OpConstantNull %[[#IntTy]] ; CHECK-DAG: %[[#MemSem:]] = OpConstant %[[#IntTy]] 8 ; CHECK-DAG: %[[#PtrPtrLongTy:]] = OpTypePointer CrossWorkgroup %[[#PtrLongTy]] diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll index 8c5c036351d97..724cbc38cb883 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll @@ -18,16 +18,17 @@ ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] ; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0 ; CHECK-DAG: [[PtrI64Ty:%.*]] = OpTypePointer Generic [[I64Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] +; CHECK-DAG: [[DEVICESCOPE:%.*]] = OpConstant [[I32Ty]] 1 ;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0 -; CHECK-DAG: [[RELAXED:%.*]] = OpConstantNull [[I32Ty]] +;; [[RELAXED:%.*]] = [[Scope]] ; CHECK: [[ADD]] = OpFunction [[I32Ty]] ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -39,7 +40,7 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -51,7 +52,7 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -63,7 +64,7 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -75,7 +76,7 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -87,7 +88,7 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -99,7 +100,7 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -111,7 +112,7 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -123,7 +124,7 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { @@ -135,10 +136,10 @@ define i32 @test_xor(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[Arg1:%.*]] = OpFunctionParameter [[PtrI64Ty]] ; CHECK-NEXT: [[Arg2:%.*]] = OpFunctionParameter [[I64Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[SCOPE]] [[RELAXED]] [[Arg2]] +; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] +; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] +; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] +; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] ; CHECK-NEXT: OpReturn ; CHECK-NEXT: OpFunctionEnd define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) { diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll index 07d1a5cf662ec..4d5aca6d404de 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll @@ -13,8 +13,8 @@ ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] ;; "acq_rel" maps to the constant 8 ; CHECK-DAG: [[ACQREL:%.*]] = OpConstant [[I32Ty]] 8 diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll index 4078ffe1a10b8..9fd3d8e95b5f1 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll @@ -13,8 +13,8 @@ ; CHECK-DAG: [[I32Ty:%.*]] = OpTypeInt 32 0 ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] -;; Device scope is encoded with constant 1 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstant [[I32Ty]] 1 +;; AllSvmDevices scope is encoded with constant 0 +; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] ;; "sequentially consistent" maps to constant 16 ; CHECK-DAG: [[SEQ:%.*]] = OpConstant [[I32Ty]] 16 diff --git a/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll new file mode 100644 index 0000000000000..ca8594a6b68ec --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll @@ -0,0 +1,163 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#Int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#Scope_AllSVMDevices:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Value:]] = OpConstant %[[#Int]] 42 +; CHECK-DAG: %[[#FPValue:]] = OpConstant %[[#Float]] 42 +; CHECK-DAG: %[[#Scope_Invocation:]] = OpConstant %[[#Int]] 4 +; CHECK-DAG: %[[#MemSem_SeqCst:]] = OpConstant %[[#Int]] 16 +; CHECK-DAG: %[[#Scope_Subgroup:]] = OpConstant %[[#Int]] 3 +; CHECK-DAG: %[[#Scope_Workgroup:]] = OpConstant %[[#Int]] 2 +; CHECK-DAG: %[[#Scope_Device:]] = OpConstant %[[#Int]] 1 +; CHECK-DAG: %[[#PointerType:]] = OpTypePointer CrossWorkgroup %[[#Int]] +; CHECK-DAG: %[[#FPPointerType:]] = OpTypePointer CrossWorkgroup %[[#Float]] +; CHECK-DAG: %[[#Pointer:]] = OpVariable %[[#PointerType]] CrossWorkgroup +; CHECK-DAG: %[[#FPPointer:]] = OpVariable %[[#FPPointerType]] CrossWorkgroup + +@ui = common dso_local addrspace(1) global i32 0, align 4 +@f = common dso_local local_unnamed_addr addrspace(1) global float 0.000000e+00, align 4 + +define dso_local spir_func void @test_singlethread_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("singlethread") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Invocation:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_subgroup_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("subgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Subgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_workgroup_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("workgroup") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Workgroup:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_device_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("device") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_Device:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} + +define dso_local spir_func void @test_all_svm_devices_atomicrmw() local_unnamed_addr { +entry: + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + + ret void +} From 25378a75752c134be0cef4999870819ba8ff8da1 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 28 Aug 2024 19:28:55 +0100 Subject: [PATCH 05/12] Fix formatting. --- clang/lib/CodeGen/CGAtomic.cpp | 6 +++--- clang/lib/CodeGen/Targets/SPIR.cpp | 3 +-- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 16 ++++++---------- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index ba6ee4c0be3b7..86f861c8d9a4e 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -768,9 +768,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, if (!ScopeModel) { llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID(""); if (CGF.getLangOpts().OpenCL) - // OpenCL approach is: "The functions that do not have memory_scope argument - // have the same semantics as the corresponding functions with the - // memory_scope argument set to memory_scope_device." See ref.: // + // OpenCL approach is: "The functions that do not have memory_scope + // argument have the same semantics as the corresponding functions with + // the memory_scope argument set to memory_scope_device." See ref.: // https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#atomic-functions SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(), SyncScope::OpenCLDevice, diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index a90741c0c0d32..f150953eeac25 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -193,8 +193,7 @@ void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention( } llvm::SyncScope::ID -SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, - SyncScope Scope, +SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope, llvm::AtomicOrdering, llvm::LLVMContext &Ctx) const { std::string Name; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 8957a8b11e92a..17b39842de6cb 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -782,7 +782,6 @@ bool SPIRVInstructionSelector::selectBitcast(Register ResVReg, return selectUnOp(ResVReg, ResType, I, SPIRV::OpBitcast); } - static void addMemoryOperands(MachineMemOperand *MemOp, MachineInstrBuilder &MIB) { uint32_t SpvMemOp = static_cast(SPIRV::MemoryOperand::None); @@ -935,9 +934,8 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg, unsigned NegateOpcode) const { assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); - uint32_t Scope = - static_cast(getMemScope(GR.CurMF->getFunction().getContext(), - MemOp->getSyncScopeID())); + uint32_t Scope = static_cast(getMemScope( + GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID())); Register ScopeReg = buildI32Constant(Scope, I); Register Ptr = I.getOperand(1).getReg(); @@ -1008,9 +1006,8 @@ bool SPIRVInstructionSelector::selectFence(MachineInstr &I) const { uint32_t MemSem = static_cast(getMemSemantics(AO)); Register MemSemReg = buildI32Constant(MemSem, I); SyncScope::ID Ord = SyncScope::ID(I.getOperand(1).getImm()); - uint32_t Scope = - static_cast(getMemScope(GR.CurMF->getFunction().getContext(), - Ord)); + uint32_t Scope = static_cast( + getMemScope(GR.CurMF->getFunction().getContext(), Ord)); Register ScopeReg = buildI32Constant(Scope, I); MachineBasicBlock &BB = *I.getParent(); return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpMemoryBarrier)) @@ -1029,9 +1026,8 @@ bool SPIRVInstructionSelector::selectAtomicCmpXchg(Register ResVReg, if (!isa(I)) { assert(I.hasOneMemOperand()); const MachineMemOperand *MemOp = *I.memoperands_begin(); - unsigned Scope = - static_cast(getMemScope(GR.CurMF->getFunction().getContext(), - MemOp->getSyncScopeID())); + unsigned Scope = static_cast(getMemScope( + GR.CurMF->getFunction().getContext(), MemOp->getSyncScopeID())); ScopeReg = buildI32Constant(Scope, I); unsigned ScSem = static_cast( From 79acf40bb2f053ab41728cf5d4ab514346254c69 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 4 Sep 2024 13:52:40 +0100 Subject: [PATCH 06/12] Incorporate review feedback. --- clang/lib/CodeGen/CGAtomic.cpp | 4 ++- clang/lib/CodeGen/Targets/SPIR.cpp | 54 ++++++++++++++---------------- 2 files changed, 28 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 86f861c8d9a4e..45b01fdab4bfd 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -766,7 +766,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, // LLVM atomic instructions always have synch scope. If clang atomic // expression has no scope operand, use default LLVM synch scope. if (!ScopeModel) { - llvm::SyncScope::ID SS = CGF.getLLVMContext().getOrInsertSyncScopeID(""); + llvm::SyncScope::ID SS; if (CGF.getLangOpts().OpenCL) // OpenCL approach is: "The functions that do not have memory_scope // argument have the same semantics as the corresponding functions with @@ -775,6 +775,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, SS = CGF.getTargetHooks().getLLVMSyncScopeID(CGF.getLangOpts(), SyncScope::OpenCLDevice, Order, CGF.getLLVMContext()); + else + SS = CGF.getLLVMContext().getOrInsertSyncScopeID(""); EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, SS); return; diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index f150953eeac25..f7ff78d394e6c 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -63,6 +63,30 @@ class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { llvm::AtomicOrdering Ordering, llvm::LLVMContext &Ctx) const override; }; + +inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { + switch (Scope) { + case SyncScope::HIPSingleThread: + case SyncScope::SingleScope: + return "singlethread"; + case SyncScope::HIPWavefront: + case SyncScope::OpenCLSubGroup: + case SyncScope::WavefrontScope: + return "subgroup"; + case SyncScope::HIPWorkgroup: + case SyncScope::OpenCLWorkGroup: + case SyncScope::WorkgroupScope: + return "workgroup"; + case SyncScope::HIPAgent: + case SyncScope::OpenCLDevice: + case SyncScope::DeviceScope: + return "device"; + case SyncScope::SystemScope: + case SyncScope::HIPSystem: + case SyncScope::OpenCLAllSVMDevices: + return "all_svm_devices"; + } +} } // End anonymous namespace. void CommonSPIRABIInfo::setCCs() { @@ -196,35 +220,7 @@ llvm::SyncScope::ID SPIRVTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &, SyncScope Scope, llvm::AtomicOrdering, llvm::LLVMContext &Ctx) const { - std::string Name; - switch (Scope) { - case SyncScope::HIPSingleThread: - case SyncScope::SingleScope: - Name = "singlethread"; - break; - case SyncScope::HIPWavefront: - case SyncScope::OpenCLSubGroup: - case SyncScope::WavefrontScope: - Name = "subgroup"; - break; - case SyncScope::HIPWorkgroup: - case SyncScope::OpenCLWorkGroup: - case SyncScope::WorkgroupScope: - Name = "workgroup"; - break; - case SyncScope::HIPAgent: - case SyncScope::OpenCLDevice: - case SyncScope::DeviceScope: - Name = "device"; - break; - case SyncScope::SystemScope: - case SyncScope::HIPSystem: - case SyncScope::OpenCLAllSVMDevices: - Name = "all_svm_devices"; - break; - } - - return Ctx.getOrInsertSyncScopeID(Name); + return Ctx.getOrInsertSyncScopeID(mapClangSyncScopeToLLVM(Scope)); } /// Construct a SPIR-V target extension type for the given OpenCL image type. From e984939b1c5364b9bf7b09f2282c5503752bddf4 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 18 Sep 2024 20:13:59 +0100 Subject: [PATCH 07/12] No need for aliases / special handling of System scope. --- clang/lib/CodeGen/CGAtomic.cpp | 2 +- clang/lib/CodeGen/Targets/SPIR.cpp | 2 +- clang/test/CodeGen/scoped-atomic-ops.c | 32 +++++++++++++------------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 45b01fdab4bfd..a2a87e012b8b2 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -776,7 +776,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest, SyncScope::OpenCLDevice, Order, CGF.getLLVMContext()); else - SS = CGF.getLLVMContext().getOrInsertSyncScopeID(""); + SS = llvm::SyncScope::System; EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size, Order, SS); return; diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index f7ff78d394e6c..764617acb8ba6 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -84,7 +84,7 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { case SyncScope::SystemScope: case SyncScope::HIPSystem: case SyncScope::OpenCLAllSVMDevices: - return "all_svm_devices"; + return ""; } } } // End anonymous namespace. diff --git a/clang/test/CodeGen/scoped-atomic-ops.c b/clang/test/CodeGen/scoped-atomic-ops.c index 24f1613e8af4e..cf98812a07e91 100644 --- a/clang/test/CodeGen/scoped-atomic-ops.c +++ b/clang/test/CodeGen/scoped-atomic-ops.c @@ -11,7 +11,7 @@ // AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("wavefront-one-as") monotonic, align 4 // AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:.+]] syncscope("singlethread-one-as") monotonic, align 4 // SPIRV: define hidden spir_func i32 @fi1a( -// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:.+]] monotonic, align 4 // SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:.+]] syncscope("device") monotonic, align 4 // SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:.+]] syncscope("workgroup") monotonic, align 4 // SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:.+]] syncscope("subgroup") monotonic, align 4 @@ -33,7 +33,7 @@ int fi1a(int *i) { // AMDGCN: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 // AMDGCN: [[TMP4:%.*]] = load atomic i32, ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 // SPIRV-LABEL: define hidden spir_func i32 @fi1b( -// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP0:%.*]] = load atomic i32, ptr [[PTR0:%.+]] monotonic, align 4 // SPIRV: [[TMP1:%.*]] = load atomic i32, ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 // SPIRV: [[TMP2:%.*]] = load atomic i32, ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 // SPIRV: [[TMP3:%.*]] = load atomic i32, ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 @@ -54,7 +54,7 @@ int fi1b(int *i) { // AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 // AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 // SPIRV-LABEL: define hidden spir_func void @fi2a( -// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 // SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 // SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 // SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 @@ -75,7 +75,7 @@ void fi2a(int *i) { // AMDGCN: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("wavefront-one-as") monotonic, align 4 // AMDGCN: store atomic i32 [[TMP4:%.+]], ptr [[PTR4:%.+]] syncscope("singlethread-one-as") monotonic, align 4 // SPIRV-LABEL: define hidden spir_func void @fi2b( -// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: store atomic i32 [[TMP0:%.+]], ptr [[PTR0:%.+]] monotonic, align 4 // SPIRV: store atomic i32 [[TMP1:%.+]], ptr [[PTR1:%.+]] syncscope("device") monotonic, align 4 // SPIRV: store atomic i32 [[TMP2:%.+]], ptr [[PTR2:%.+]] syncscope("workgroup") monotonic, align 4 // SPIRV: store atomic i32 [[TMP3:%.+]], ptr [[PTR3:%.+]] syncscope("subgroup") monotonic, align 4 @@ -98,14 +98,14 @@ void fi2b(int *i) { // AMDGCN: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("one-as") monotonic, align 4 // AMDGCN: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("one-as") monotonic, align 4 // SPIRV-LABEL: define hidden spir_func void @fi3a( -// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] syncscope("all_svm_devices") monotonic, align 4 -// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP0:%.*]] = atomicrmw add ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4 +// SPIRV: [[TMP1:%.*]] = atomicrmw sub ptr [[PTR1:%.+]], i32 [[VAL1:.+]] monotonic, align 4 +// SPIRV: [[TMP2:%.*]] = atomicrmw and ptr [[PTR2:%.+]], i32 [[VAL2:.+]] monotonic, align 4 +// SPIRV: [[TMP3:%.*]] = atomicrmw or ptr [[PTR3:%.+]], i32 [[VAL3:.+]] monotonic, align 4 +// SPIRV: [[TMP4:%.*]] = atomicrmw xor ptr [[PTR4:%.+]], i32 [[VAL4:.+]] monotonic, align 4 +// SPIRV: [[TMP5:%.*]] = atomicrmw nand ptr [[PTR5:%.+]], i32 [[VAL5:.+]] monotonic, align 4 +// SPIRV: [[TMP6:%.*]] = atomicrmw min ptr [[PTR6:%.+]], i32 [[VAL6:.+]] monotonic, align 4 +// SPIRV: [[TMP7:%.*]] = atomicrmw max ptr [[PTR7:%.+]], i32 [[VAL7:.+]] monotonic, align 4 void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -236,7 +236,7 @@ void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { // AMDGCN-LABEL: define hidden zeroext i1 @fi4a( // AMDGCN-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a( -// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4 +// SPIRV-DAG: [[TMP0:%.*]] = cmpxchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4 _Bool fi4a(int *i) { int cmp = 0; int desired = 1; @@ -296,7 +296,7 @@ _Bool fi4e(int *i) { // AMDGCN-LABEL: define hidden zeroext i1 @fi5a( // AMDGCN: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("one-as") acquire acquire, align 4 // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a( -// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] syncscope("all_svm_devices") acquire acquire, align 4 +// SPIRV: [[TMP0:%.*]] = cmpxchg weak ptr [[PTR0:%.+]], i32 [[VAL0:.+]], i32 [[VAL1:.+]] acquire acquire, align 4 _Bool fi5a(int *i) { int cmp = 0; return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, @@ -348,7 +348,7 @@ _Bool fi5e(int *i) { // AMDGCN-LABEL: define hidden i32 @fi6a( // AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("one-as") monotonic, align 4 // SPIRV-LABEL: define hidden spir_func i32 @fi6a( -// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 4 +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i32 [[VAL0:.+]] monotonic, align 4 int fi6a(int *c, int *d) { int ret; __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); @@ -398,7 +398,7 @@ int fi6e(int *c, int *d) { // AMDGCN-LABEL: define hidden zeroext i1 @fi7a( // AMDGCN: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("one-as") monotonic, align 1 // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a( -// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] syncscope("all_svm_devices") monotonic, align 1 +// SPIRV: [[TMP0:%.*]] = atomicrmw xchg ptr [[PTR0:%.+]], i8 [[VAL0:.+]] monotonic, align 1 _Bool fi7a(_Bool *c) { return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); From ced6877031ae7b0de11b7be147f4ea988caaf687 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 18 Sep 2024 21:47:53 +0100 Subject: [PATCH 08/12] Remove & replace SyncScopeIDs struct. --- .../Target/SPIRV/SPIRVInstructionSelector.cpp | 24 ------------ llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 38 ++++++++++++------- llvm/lib/Target/SPIRV/SPIRVUtils.h | 2 +- 3 files changed, 25 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index faa05baf5c252..7c71a18a9bf81 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -33,28 +33,6 @@ #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" -namespace { -// TODO: consider removing this altogether and merely inserting the scopes -// directly in SetupMF. -struct SyncScopeIDs { - llvm::SyncScope::ID Work_ItemSSID; - llvm::SyncScope::ID WorkGroupSSID; - llvm::SyncScope::ID DeviceSSID; - llvm::SyncScope::ID AllSVMDevicesSSID; - llvm::SyncScope::ID SubGroupSSID; - - SyncScopeIDs() {} - SyncScopeIDs(llvm::LLVMContext &Context) { - Work_ItemSSID = Context.getOrInsertSyncScopeID("singlethread"); - WorkGroupSSID = Context.getOrInsertSyncScopeID("workgroup"); - DeviceSSID = Context.getOrInsertSyncScopeID("device"); - AllSVMDevicesSSID = Context.getOrInsertSyncScopeID("all_svm_devices"); - SubGroupSSID = Context.getOrInsertSyncScopeID("subgroup"); - } -}; - -} // namespace - #define DEBUG_TYPE "spirv-isel" using namespace llvm; @@ -77,7 +55,6 @@ class SPIRVInstructionSelector : public InstructionSelector { const RegisterBankInfo &RBI; SPIRVGlobalRegistry &GR; MachineRegisterInfo *MRI; - SyncScopeIDs SSIDs; MachineFunction *HasVRegsReset = nullptr; /// We need to keep track of the number we give to anonymous global values to @@ -306,7 +283,6 @@ void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - SSIDs = SyncScopeIDs(MF.getFunction().getContext()); MRI = &MF.getRegInfo(); GR.setCurrentFunc(MF); InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 15f577f0e1fc3..2b20960653f92 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -251,22 +251,32 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) { llvm_unreachable(nullptr); } -SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID) { - SmallVector SSNs; - Ctx.getSyncScopeNames(SSNs); - - StringRef MemScope = SSNs[ID]; - if (MemScope.empty() || MemScope == "all_svm_devices") +SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { + static const struct { + // Named by + // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_scope_id. + // We don't need aliases for Invocation and CrossDevice, as we already have + // them covered by "singlethread" and "" strings respectively (see + // implementation of LLVMContext::LLVMContext()). + llvm::SyncScope::ID SubGroupSSID = + Ctx.getOrInsertSyncScopeID("subgroup"); + llvm::SyncScope::ID WorkGroupSSID = + Ctx.getOrInsertSyncScopeID("workgroup"); + llvm::SyncScope::ID DeviceSSID = + Ctx.getOrInsertSyncScopeID("device"); + } SSIDs{}; + + if (Id == llvm::SyncScope::SingleThread) + return SPIRV::Scope::Invocation; + else if (Id == llvm::SyncScope::System) return SPIRV::Scope::CrossDevice; - if (MemScope == "device") - return SPIRV::Scope::Device; - if (MemScope == "workgroup") - return SPIRV::Scope::Workgroup; - if (MemScope == "subgroup") + else if (Id == SSIDs.SubGroupSSID) return SPIRV::Scope::Subgroup; - if (MemScope == "singlethread") - return SPIRV::Scope::Invocation; - return SPIRV::Scope::Device; // Follow OpenCL convention for now. + else if (Id == SSIDs.WorkGroupSSID) + return SPIRV::Scope::Workgroup; + else if (Id == SSIDs.DeviceSSID) + return SPIRV::Scope::Device; + return SPIRV::Scope::CrossDevice; } MachineInstr *getDefInstrMaybeConstant(Register &ConstReg, diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index cad94fb36ee49..3a291cf9f6d54 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -75,7 +75,7 @@ getMemSemanticsForStorageClass(SPIRV::StorageClass::StorageClass SC); SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord); -SPIRV::Scope::Scope getMemScope(const LLVMContext &Ctx, SyncScope::ID ID); +SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id); // Find def instruction for the given ConstReg, walking through // spv_track_constant and ASSIGN_TYPE instructions. Updates ConstReg by def From ec0eb50fd03a08976b9d6152741d0d3ed3190c28 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 18 Sep 2024 21:54:43 +0100 Subject: [PATCH 09/12] Fix formatting. --- llvm/lib/Target/SPIRV/SPIRVUtils.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index 2b20960653f92..8d8fcf9cd2083 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -258,23 +258,20 @@ SPIRV::Scope::Scope getMemScope(LLVMContext &Ctx, SyncScope::ID Id) { // We don't need aliases for Invocation and CrossDevice, as we already have // them covered by "singlethread" and "" strings respectively (see // implementation of LLVMContext::LLVMContext()). - llvm::SyncScope::ID SubGroupSSID = - Ctx.getOrInsertSyncScopeID("subgroup"); - llvm::SyncScope::ID WorkGroupSSID = - Ctx.getOrInsertSyncScopeID("workgroup"); - llvm::SyncScope::ID DeviceSSID = - Ctx.getOrInsertSyncScopeID("device"); + llvm::SyncScope::ID SubGroup = Ctx.getOrInsertSyncScopeID("subgroup"); + llvm::SyncScope::ID WorkGroup = Ctx.getOrInsertSyncScopeID("workgroup"); + llvm::SyncScope::ID Device = Ctx.getOrInsertSyncScopeID("device"); } SSIDs{}; if (Id == llvm::SyncScope::SingleThread) return SPIRV::Scope::Invocation; else if (Id == llvm::SyncScope::System) return SPIRV::Scope::CrossDevice; - else if (Id == SSIDs.SubGroupSSID) + else if (Id == SSIDs.SubGroup) return SPIRV::Scope::Subgroup; - else if (Id == SSIDs.WorkGroupSSID) + else if (Id == SSIDs.WorkGroup) return SPIRV::Scope::Workgroup; - else if (Id == SSIDs.DeviceSSID) + else if (Id == SSIDs.Device) return SPIRV::Scope::Device; return SPIRV::Scope::CrossDevice; } From e2f72fb162a0efc28e06cd208ae36c416dbc9e36 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 20 Sep 2024 15:09:20 +0100 Subject: [PATCH 10/12] Incorporate review feedback. --- clang/lib/CodeGen/Targets/SPIR.cpp | 1 + .../CodeGen/SPIRV/AtomicCompareExchange.ll | 6 +-- llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll | 46 +++++++++---------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 764617acb8ba6..d5e8e4f7a5916 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -86,6 +86,7 @@ inline StringRef mapClangSyncScopeToLLVM(SyncScope Scope) { case SyncScope::OpenCLAllSVMDevices: return ""; } + return ""; } } // End anonymous namespace. diff --git a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll index 9e19413a15db1..f8207c56a5656 100644 --- a/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll +++ b/llvm/test/CodeGen/SPIRV/AtomicCompareExchange.ll @@ -1,7 +1,7 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV ; CHECK-SPIRV: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-SPIRV-DAG: %[[#MemScope_AllSvmDevices:]] = OpConstant %[[#Int]] 0 +; CHECK-SPIRV-DAG: %[[#MemScope_CrossDevice:]] = OpConstant %[[#Int]] 0 ; CHECK-SPIRV-DAG: %[[#MemSemEqual_SeqCst:]] = OpConstant %[[#Int]] 16 ; CHECK-SPIRV-DAG: %[[#MemSemUnequal_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-SPIRV-DAG: %[[#Constant_456:]] = OpConstant %[[#Int]] 456 @@ -11,7 +11,7 @@ ; CHECK-SPIRV-DAG: %[[#UndefStruct:]] = OpUndef %[[#Struct]] ; CHECK-SPIRV: %[[#Value:]] = OpLoad %[[#Int]] %[[#Value_ptr:]] -; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_AllSvmDevices]] +; CHECK-SPIRV: %[[#Res:]] = OpAtomicCompareExchange %[[#Int]] %[[#Pointer:]] %[[#MemScope_CrossDevice]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Value]] %[[#Comparator:]] ; CHECK-SPIRV: %[[#Success:]] = OpIEqual %[[#]] %[[#Res]] %[[#Comparator]] ; CHECK-SPIRV: %[[#Composite_0:]] = OpCompositeInsert %[[#Struct]] %[[#Res]] %[[#UndefStruct]] 0 @@ -34,7 +34,7 @@ cmpxchg.continue: ; preds = %cmpxchg.store_expec ret void } -; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_AllSvmDevices]] +; CHECK-SPIRV: %[[#Res_1:]] = OpAtomicCompareExchange %[[#Int]] %[[#Ptr:]] %[[#MemScope_CrossDevice]] ; CHECK-SPIRV-SAME: %[[#MemSemEqual_SeqCst]] %[[#MemSemUnequal_Acquire]] %[[#Constant_456]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Success_1:]] = OpIEqual %[[#]] %[[#Res_1]] %[[#Constant_128]] ; CHECK-SPIRV: %[[#Composite:]] = OpCompositeInsert %[[#Struct]] %[[#Res_1]] %[[#UndefStruct]] 0 diff --git a/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll index ca8594a6b68ec..130db18534832 100644 --- a/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll +++ b/llvm/test/CodeGen/SPIRV/scoped_atomicrmw.ll @@ -6,7 +6,7 @@ ; CHECK: %[[#Int:]] = OpTypeInt 32 0 ; CHECK-DAG: %[[#Float:]] = OpTypeFloat 32 -; CHECK-DAG: %[[#Scope_AllSVMDevices:]] = OpConstant %[[#Int]] 0 +; CHECK-DAG: %[[#Scope_CrossDevice:]] = OpConstant %[[#Int]] 0 ; CHECK-DAG: %[[#Value:]] = OpConstant %[[#Int]] 42 ; CHECK-DAG: %[[#FPValue:]] = OpConstant %[[#Float]] 42 ; CHECK-DAG: %[[#Scope_Invocation:]] = OpConstant %[[#Int]] 4 @@ -136,28 +136,28 @@ entry: define dso_local spir_func void @test_all_svm_devices_atomicrmw() local_unnamed_addr { entry: - %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] - %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] - %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 syncscope("all_svm_devices") seq_cst - ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_AllSvmDevices:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst + ; CHECK: %[[#]] = OpAtomicExchange %[[#Float:]] %[[#FPPointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#FPValue:]] + %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] + %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 seq_cst + ; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer:]] %[[#Scope_CrossDevice:]] %[[#MemSem_SeqCst:]] %[[#Value:]] ret void } From 96a79e7c23d85097291c955bce65faf9b1ffde99 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Fri, 20 Sep 2024 16:12:08 +0100 Subject: [PATCH 11/12] Try to work around confusing Scope re-use. --- llvm/test/CodeGen/SPIRV/atomicrmw.ll | 25 ++++++++-------- .../test/CodeGen/SPIRV/instructions/atomic.ll | 30 +++++++++---------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/SPIRV/atomicrmw.ll b/llvm/test/CodeGen/SPIRV/atomicrmw.ll index fa80bc97ab7cd..07576056117cb 100644 --- a/llvm/test/CodeGen/SPIRV/atomicrmw.ll +++ b/llvm/test/CodeGen/SPIRV/atomicrmw.ll @@ -5,8 +5,7 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Int:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#Scope_AllSvmDevices:]] = OpConstant %[[#Int]] 0{{$}} -;; %[[#MemSem_Relaxed:]] = %[[#Scope_AllSvmDevices:]] Constant 0 re-used for scope & semantics +; CHECK-DAG: %[[#Scope_CrossDevice:]] = OpConstant %[[#Int]] 0{{$}} ; CHECK-DAG: %[[#MemSem_Acquire:]] = OpConstant %[[#Int]] 2 ; CHECK-DAG: %[[#MemSem_Release:]] = OpConstant %[[#Int]] 4{{$}} ; CHECK-DAG: %[[#MemSem_AcquireRelease:]] = OpConstant %[[#Int]] 8 @@ -25,37 +24,37 @@ define dso_local spir_func void @test_atomicrmw() local_unnamed_addr { entry: %0 = atomicrmw xchg i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] %1 = atomicrmw xchg float addrspace(1)* @f, float 42.000000e+00 seq_cst -; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] +; CHECK: %[[#]] = OpAtomicExchange %[[#Float]] %[[#FPPointer]] %[[#Scope_CrossDevice]] %[[#MemSem_SequentiallyConsistent]] %[[#FPValue]] %2 = atomicrmw add i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#Scope_AllSvmDevices]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicIAdd %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %{{.+}} %[[#Value]] %3 = atomicrmw sub i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicISub %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Acquire]] %[[#Value]] %4 = atomicrmw or i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicOr %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Release]] %[[#Value]] %5 = atomicrmw xor i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicXor %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] %6 = atomicrmw and i32 addrspace(1)* @ui, i32 42 seq_cst -; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicAnd %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_SequentiallyConsistent]] %[[#Value]] %7 = atomicrmw max i32 addrspace(1)* @ui, i32 42 monotonic -; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#Scope_AllSvmDevices]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMax %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %{{.*}} %[[#Value]] %8 = atomicrmw min i32 addrspace(1)* @ui, i32 42 acquire -; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Acquire]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicSMin %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Acquire]] %[[#Value]] %9 = atomicrmw umax i32 addrspace(1)* @ui, i32 42 release -; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_Release]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMax %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_Release]] %[[#Value]] %10 = atomicrmw umin i32 addrspace(1)* @ui, i32 42 acq_rel -; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_AllSvmDevices]] %[[#MemSem_AcquireRelease]] %[[#Value]] +; CHECK: %[[#]] = OpAtomicUMin %[[#Int]] %[[#Pointer]] %[[#Scope_CrossDevice]] %[[#MemSem_AcquireRelease]] %[[#Value]] ret void } diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll index 724cbc38cb883..f4e7b128f77a3 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll @@ -18,17 +18,15 @@ ; CHECK-DAG: [[PtrI32Ty:%.*]] = OpTypePointer Function [[I32Ty]] ; CHECK-DAG: [[I64Ty:%.*]] = OpTypeInt 64 0 ; CHECK-DAG: [[PtrI64Ty:%.*]] = OpTypePointer Generic [[I64Ty]] -;; AllSvmDevices scope is encoded with constant 0 -; CHECK-DAG: [[SCOPE:%.*]] = OpConstantNull [[I32Ty]] +; CHECK-DAG: [[CROSSDEVICESCOPE:%.*]] = OpConstantNull [[I32Ty]] ; CHECK-DAG: [[DEVICESCOPE:%.*]] = OpConstant [[I32Ty]] 1 ;; "monotonic" maps to the relaxed memory semantics, encoded with constant 0 -;; [[RELAXED:%.*]] = [[Scope]] ; CHECK: [[ADD]] = OpFunction [[I32Ty]] ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -40,7 +38,7 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -52,7 +50,7 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -64,7 +62,7 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -76,7 +74,7 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -88,7 +86,7 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -100,7 +98,7 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -112,7 +110,7 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -124,7 +122,7 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter [[PtrI32Ty]] ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter [[I32Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[SCOPE]] [[B]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[CROSSDEVICESCOPE]] {{.+}} [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { @@ -136,10 +134,10 @@ define i32 @test_xor(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[Arg1:%.*]] = OpFunctionParameter [[PtrI64Ty]] ; CHECK-NEXT: [[Arg2:%.*]] = OpFunctionParameter [[I64Ty]] ; CHECK-NEXT: OpLabel -; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] -; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] -; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] [[SCOPE]] [[Arg2]] +; CHECK-NEXT: OpAtomicSMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicSMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicUMin [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] +; CHECK-NEXT: OpAtomicUMax [[I64Ty]] [[Arg1]] [[DEVICESCOPE]] {{.+}} [[Arg2]] ; CHECK-NEXT: OpReturn ; CHECK-NEXT: OpFunctionEnd define dso_local spir_kernel void @test_wrappers(ptr addrspace(4) %arg, i64 %val) { From 92f739cb04f7b39f047c071d2e2c3d6f206dd231 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 24 Sep 2024 04:06:20 +0100 Subject: [PATCH 12/12] Add test for OCL defaulting to `device` scope. --- ...atomic-builtins-default-to-device-scope.cl | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl diff --git a/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl new file mode 100644 index 0000000000000..5af2d807b4189 --- /dev/null +++ b/clang/test/CodeGenOpenCL/atomic-builtins-default-to-device-scope.cl @@ -0,0 +1,235 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O3 -o - -triple=amdgcn-amd-amdhsa \ +// RUN: | FileCheck %s --check-prefix=AMDGCN +// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O3 -o - -triple=spirv64-unknown-unknown \ +// RUN: | FileCheck %s --check-prefix=SPIRV + +// AMDGCN-LABEL: define dso_local i32 @load( +// AMDGCN-SAME: ptr nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = load atomic i32, ptr [[P]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @load( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef readonly [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = load atomic i32, ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int load(int *p) { return __atomic_load_n(p, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local void @store( +// AMDGCN-SAME: ptr nocapture noundef writeonly [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: store atomic i32 [[X]], ptr [[P]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret void +// +// SPIRV-LABEL: define spir_func void @store( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef writeonly [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: store atomic i32 [[X]], ptr addrspace(4) [[P]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret void +// +void store(int *p, int x) { return __atomic_store_n(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @add( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw add ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @add( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw add ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int add(int *p, int x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fadd( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fadd( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fadd(float *p, float x) { return __atomic_fetch_add(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @sub( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @sub( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw sub ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int sub(int *p, int x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fsub( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fsub( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fsub(float *p, float x) { return __atomic_fetch_sub(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @and( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw and ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @and( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw and ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int and(int *p, int x) { return __atomic_fetch_and(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @nand( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @nand( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw nand ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int nand(int *p, int x) { return __atomic_fetch_nand(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @or( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw or ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @or( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw or ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int or(int *p, int x) { return __atomic_fetch_or(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @xor( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @xor( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw xor ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int xor(int *p, int x) { return __atomic_fetch_xor(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @min( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw min ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @min( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw min ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int min(int *p, int x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fmin( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fmin( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fmin ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fmin(float *p, float x) { return __atomic_fetch_min(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @max( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw max ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @max( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw max ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int max(int *p, int x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local float @fmax( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr [[P]], float [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret float [[TMP0]] +// +// SPIRV-LABEL: define spir_func float @fmax( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw fmax ptr addrspace(4) [[P]], float [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret float [[TMP0]] +// +float fmax(float *p, float x) { return __atomic_fetch_max(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local i32 @xchg( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr [[P]], i32 [[X]] syncscope("agent") seq_cst, align 4 +// AMDGCN-NEXT: ret i32 [[TMP0]] +// +// SPIRV-LABEL: define spir_func i32 @xchg( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = atomicrmw xchg ptr addrspace(4) [[P]], i32 [[X]] syncscope("device") seq_cst, align 4 +// SPIRV-NEXT: ret i32 [[TMP0]] +// +int xchg(int *p, int x) { return __atomic_exchange_n(p, x, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local range(i32 0, 2) i32 @cmpxchg( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = cmpxchg ptr [[P]], i32 [[X]], i32 [[Y]] syncscope("agent") seq_cst seq_cst, align 4 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// AMDGCN-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCN-NEXT: ret i32 [[CONV]] +// +// SPIRV-LABEL: define spir_func range(i32 0, 2) i32 @cmpxchg( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = cmpxchg ptr addrspace(4) [[P]], i32 [[X]], i32 [[Y]] syncscope("device") seq_cst seq_cst, align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +int cmpxchg(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } +// AMDGCN-LABEL: define dso_local range(i32 0, 2) i32 @cmpxchg_weak( +// AMDGCN-SAME: ptr nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGCN-NEXT: [[ENTRY:.*:]] +// AMDGCN-NEXT: [[TMP0:%.*]] = cmpxchg weak ptr [[P]], i32 [[X]], i32 [[Y]] syncscope("agent") seq_cst seq_cst, align 4 +// AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// AMDGCN-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// AMDGCN-NEXT: ret i32 [[CONV]] +// +// SPIRV-LABEL: define spir_func range(i32 0, 2) i32 @cmpxchg_weak( +// SPIRV-SAME: ptr addrspace(4) nocapture noundef [[P:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SPIRV-NEXT: [[ENTRY:.*:]] +// SPIRV-NEXT: [[TMP0:%.*]] = cmpxchg weak ptr addrspace(4) [[P]], i32 [[X]], i32 [[Y]] syncscope("device") seq_cst seq_cst, align 4 +// SPIRV-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1 +// SPIRV-NEXT: [[CONV:%.*]] = zext i1 [[TMP1]] to i32 +// SPIRV-NEXT: ret i32 [[CONV]] +// +int cmpxchg_weak(int *p, int x, int y) { return __atomic_compare_exchange(p, &x, &y, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); }