From 9f3cac44dde7d0adcf6cd090c0b91f57cb1c4dca Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 2 Oct 2024 11:18:36 +0100 Subject: [PATCH 01/14] Enable `InferAddressSpaces` for SPIR-V. --- .../amdgpu-kernel-arg-pointer-type.cu | 62 ++--- llvm/lib/Target/SPIRV/CMakeLists.txt | 2 + llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 92 +++++++ llvm/lib/Target/SPIRV/SPIRVTargetMachine.h | 7 + .../Target/SPIRV/SPIRVTargetTransformInfo.h | 4 + .../SPIRV/assumed-addrspace.ll | 31 +++ .../InferAddressSpaces/SPIRV/basic.ll | 236 ++++++++++++++++++ .../SPIRV/infer-address-space.ll | 211 ++++++++++++++++ .../SPIRV/infer-addrspacecast.ll | 65 +++++ .../SPIRV/infer-getelementptr.ll | 108 ++++++++ .../SPIRV/insert-pos-assert.ll | 158 ++++++++++++ .../InferAddressSpaces/SPIRV/is.constant.ll | 57 +++++ .../InferAddressSpaces/SPIRV/lit.local.cfg | 2 + .../SPIRV/mem-intrinsics.ll | 145 +++++++++++ .../SPIRV/multiple-uses-of-val.ll | 70 ++++++ .../InferAddressSpaces/SPIRV/prefetch.ll | 60 +++++ .../preserving-debugloc-addrspacecast.ll | 48 ++++ .../SPIRV/redundant-addrspacecast.ll | 28 +++ .../InferAddressSpaces/SPIRV/self-phi.ll | 29 +++ .../InferAddressSpaces/SPIRV/volatile.ll | 187 ++++++++++++++ 20 files changed, 1567 insertions(+), 35 deletions(-) create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index b295bbbdaaf95..15c8b46d278ea 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -58,13 +58,11 @@ // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel1Pi( @@ -126,13 +124,11 @@ __global__ void kernel1(int *x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel2Ri( @@ -195,7 +191,7 @@ __global__ void kernel2(int &x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( -// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(2) nocapture noundef readonly [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 @@ -261,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x, // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( -// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(4) nocapture noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -343,7 +339,7 @@ struct S { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( -// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 @@ -446,19 +442,17 @@ __global__ void kernel4(struct S s) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) nocapture noundef readonly [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[S_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[TMP1]], align 8 -// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP2]], align 4 -// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TMP1]], i64 8 -// OPT-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 -// OPT-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[TMP4]], align 4 -// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP5]], 1.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP4]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8 +// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel5P1S( @@ -551,7 +545,7 @@ struct T { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( -// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 @@ -631,13 +625,11 @@ __global__ void kernel6(struct T t) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noalias nocapture noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[X_COERCE]] to i64 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel7Pi( @@ -700,7 +692,7 @@ struct SS { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( -// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] { +// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 diff --git a/llvm/lib/Target/SPIRV/CMakeLists.txt b/llvm/lib/Target/SPIRV/CMakeLists.txt index 326343ae27814..0ae292498e463 100644 --- a/llvm/lib/Target/SPIRV/CMakeLists.txt +++ b/llvm/lib/Target/SPIRV/CMakeLists.txt @@ -52,6 +52,8 @@ add_llvm_target(SPIRVCodeGen Core Demangle GlobalISel + Passes + Scalar SPIRVAnalysis MC SPIRVDesc diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index e5384b2eb2c2c..91bcd68813fc5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -26,9 +26,15 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" +#include "llvm/Passes/OptimizationLevel.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Utils.h" #include @@ -91,6 +97,89 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, setRequiresStructuredCFG(false); } +namespace { + enum AddressSpace { + Function = storageClassToAddressSpace(SPIRV::StorageClass::Function), + CrossWorkgroup = + storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup), + UniformConstant = + storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), + Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), + Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) + }; +} + +unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { + const auto *LD = dyn_cast(V); + if (!LD) + return UINT32_MAX; + + // It must be a load from a pointer to Generic. + assert(V->getType()->isPointerTy() && + V->getType()->getPointerAddressSpace() == AddressSpace::Generic); + + const auto *Ptr = LD->getPointerOperand(); + if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant) + return UINT32_MAX; + // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup + // storage, as this could only have been legally initialised with a + // CrossWorkgroup (aka device) constant pointer. + return AddressSpace::CrossWorkgroup; +} + +std::pair +SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { + using namespace PatternMatch; + + if (auto *II = dyn_cast(V)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_is_shared: + return std::pair(II->getArgOperand(0), AddressSpace::Workgroup); + case Intrinsic::amdgcn_is_private: + return std::pair(II->getArgOperand(0), AddressSpace::Function); + default: + break; + } + return std::pair(nullptr, UINT32_MAX); + } + // Check the global pointer predication based on + // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and + // the order of 'is_shared' and 'is_private' is not significant. + Value *Ptr; + if (getTargetTriple().getVendor() == Triple::VendorType::AMD && + match( + const_cast(V), + m_c_And(m_Not(m_Intrinsic(m_Value(Ptr))), + m_Not(m_Intrinsic(m_Deferred(Ptr)))))) + return std::pair(Ptr, AddressSpace::CrossWorkgroup); + + return std::pair(nullptr, UINT32_MAX); +} + +bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup) + return false; + return DestAS == AddressSpace::Generic || + DestAS == AddressSpace::CrossWorkgroup; +} + +void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { + PB.registerCGSCCOptimizerLateEPCallback([](CGSCCPassManager &PM, + OptimizationLevel Level) { + if (Level == OptimizationLevel::O0) + return; + + FunctionPassManager FPM; + + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic)); + + PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + }); +} + namespace { // SPIR-V Code Generator Pass Configuration Options. class SPIRVPassConfig : public TargetPassConfig { @@ -178,6 +267,9 @@ void SPIRVPassConfig::addIRPasses() { addPass(createSPIRVStructurizerPass()); } + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(createInferAddressSpacesPass(AddressSpace::Generic)); + addPass(createSPIRVRegularizerPass()); addPass(createSPIRVPrepareFunctionsPass(TM)); addPass(createSPIRVStripConvergenceIntrinsicsPass()); diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h index a1a9f26846153..24b09febb9d18 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h @@ -43,6 +43,13 @@ class SPIRVTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + unsigned getAssumedAddrSpace(const Value *V) const override; + std::pair + getPredicatedAddrSpace(const Value *V) const override; + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; + + void registerPassBuilderCallbacks(PassBuilder &PB) override; }; } // namespace llvm diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h index 24047f31fab29..295c0ceeade83 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetTransformInfo.h @@ -39,6 +39,10 @@ class SPIRVTTIImpl : public BasicTTIImplBase { : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + unsigned getFlatAddressSpace() const { + return storageClassToAddressSpace(SPIRV::StorageClass::Generic); + } + TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) { // SPIR-V natively supports OpBitcount, per 3.53.14 in the spec, as such it // is reasonable to assume the Op is fast / preferable to the expanded loop. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll new file mode 100644 index 0000000000000..9b65ff44f288f --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s + +@c0 = addrspace(2) global ptr undef + +; CHECK-LABEL: @generic_ptr_from_constant +; CHECK: addrspacecast ptr addrspace(4) %p to ptr addrspace(1) +; CHECK-NEXT: load float, ptr addrspace(1) +define spir_func float @generic_ptr_from_constant() { + %p = load ptr addrspace(4), ptr addrspace(2) @c0 + %v = load float, ptr addrspace(4) %p + ret float %v +} + +%struct.S = type { ptr addrspace(4), ptr addrspace(4) } + +; CHECK-LABEL: @generic_ptr_from_aggregate_argument +; CHECK: addrspacecast ptr addrspace(4) %p0 to ptr addrspace(1) +; CHECK: addrspacecast ptr addrspace(4) %p1 to ptr addrspace(1) +; CHECK: load i32, ptr addrspace(1) +; CHECK: store float %v1, ptr addrspace(1) +; CHECK: ret +define spir_kernel void @generic_ptr_from_aggregate_argument(ptr addrspace(2) byval(%struct.S) align 8 %0) { + %p0 = load ptr addrspace(4), ptr addrspace(2) %0 + %f1 = getelementptr inbounds %struct.S, ptr addrspace(2) %0, i64 0, i32 1 + %p1 = load ptr addrspace(4), ptr addrspace(2) %f1 + %v0 = load i32, ptr addrspace(4) %p0 + %v1 = sitofp i32 %v0 to float + store float %v1, ptr addrspace(4) %p1 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll new file mode 100644 index 0000000000000..75b23aa30349a --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Trivial optimization of generic addressing + +define float @load_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_global_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) + %tmp1 = load float, ptr addrspace(1) %tmp0 + ret float %tmp1 +} + +define float @load_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_group_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) + %tmp1 = load float, ptr addrspace(3) %tmp0 + ret float %tmp1 +} + +define float @load_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define float @load_private_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret float [[TMP1]] +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr + %tmp1 = load float, ptr %tmp0 + ret float %tmp1 +} + +define spir_kernel void @store_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_global_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) + store float 0.0, ptr addrspace(1) %tmp0 + ret void +} + +define spir_kernel void @store_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_group_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) + store float 0.0, ptr addrspace(3) %tmp0 + ret void +} + +define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { +; CHECK-LABEL: define spir_kernel void @store_private_from_flat( +; CHECK-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; CHECK-NEXT: store float 0.000000e+00, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr + store float 0.0, ptr %tmp0 + ret void +} + +define spir_kernel void @load_store_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_global( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @load_store_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_group( +; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @load_store_private(ptr nocapture %input, ptr nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_private( +; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @load_store_flat(ptr addrspace(4) nocapture %input, ptr addrspace(4) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_flat( +; CHECK-SAME: ptr addrspace(4) nocapture [[INPUT:%.*]], ptr addrspace(4) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) [[INPUT]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(4) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %val = load i32, ptr addrspace(4) %input, align 4 + store i32 %val, ptr addrspace(4) %output, align 4 + ret void +} + +define spir_kernel void @store_addrspacecast_ptr_value(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @store_addrspacecast_ptr_value( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: store ptr addrspace(4) [[CAST]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + store ptr addrspace(4) %cast, ptr addrspace(1) %output, align 4 + ret void +} + +define i32 @atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @atomicrmw_add_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define i32 @atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @atomicrmw_add_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(3) [[GROUP_PTR]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define { i32, i1 } @cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @cmpxchg_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(1) [[GLOBAL_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = cmpxchg ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define { i32, i1 } @cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @cmpxchg_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[GROUP_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = cmpxchg ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand(ptr addrspace(3) %cas.ptr, ptr addrspace(3) %cmp.ptr, ptr addrspace(4) %val) #0 { + %cast.cmp = addrspacecast ptr addrspace(3) %cmp.ptr to ptr addrspace(4) + %ret = cmpxchg ptr addrspace(3) %cas.ptr, ptr addrspace(4) %cast.cmp, ptr addrspace(4) %val seq_cst monotonic + ret { ptr addrspace(4), i1 } %ret +} + +define void @local_nullptr(ptr addrspace(1) nocapture %results, ptr addrspace(3) %a) { +; CHECK-LABEL: define void @local_nullptr( +; CHECK-SAME: ptr addrspace(1) nocapture [[RESULTS:%.*]], ptr addrspace(3) [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr addrspace(3) [[A]], addrspacecast (ptr null to ptr addrspace(3)) +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32 +; CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[RESULTS]], align 4 +; CHECK-NEXT: ret void +; +entry: + %tobool = icmp ne ptr addrspace(3) %a, addrspacecast (ptr null to ptr addrspace(3)) + %conv = zext i1 %tobool to i32 + store i32 %conv, ptr addrspace(1) %results, align 4 + ret void +} + +define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 + ret i32 %ret +} + +define ptr addrspace(4) @try_infer_getelementptr_constant_null() { +; CHECK-LABEL: define ptr addrspace(4) @try_infer_getelementptr_constant_null() { +; CHECK-NEXT: [[CE:%.*]] = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 +; CHECK-NEXT: ret ptr addrspace(4) [[CE]] +; + %ce = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 + ret ptr addrspace(4) %ce +} + +attributes #0 = { nounwind } + +!0 = !{} +;. +; CHECK: [[META0]] = !{} +;. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll new file mode 100644 index 0000000000000..7de9557a9ee90 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-address-space.ll @@ -0,0 +1,211 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv64-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s +; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll + +@scalar = internal addrspace(3) global float 0.0, align 4 +@array = internal addrspace(3) global [10 x float] zeroinitializer, align 4 + +define spir_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { +; CHECK-LABEL: define spir_kernel void @load_store_lds_f32( +; CHECK-SAME: i32 [[I:%.*]], float [[V:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load float, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP2]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP3]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) getelementptr inbounds ([10 x float], ptr addrspace(3) @array, i32 0, i32 5), align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 5 +; CHECK-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(3) [[TMP4]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP5]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP4]], align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [10 x float], ptr addrspace(3) @array, i32 0, i32 [[I]] +; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[TMP8]]) +; CHECK-NEXT: store float [[V]], ptr addrspace(3) [[TMP7]], align 4 +; CHECK-NEXT: call addrspace(4) void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: ret void +; +bb: + %tmp = load float, ptr addrspace(4) addrspacecast (ptr addrspace(3) @scalar to ptr addrspace(4)), align 4 + call void @use(float %tmp) + store float %v, ptr addrspace(4) addrspacecast (ptr addrspace(3) @scalar to ptr addrspace(4)), align 4 + call void @llvm.amdgcn.s.barrier() + %tmp1 = addrspacecast ptr addrspace(3) @scalar to ptr addrspace(4) + %tmp2 = load float, ptr addrspace(4) %tmp1, align 4 + call void @use(float %tmp2) + store float %v, ptr addrspace(4) %tmp1, align 4 + call void @llvm.amdgcn.s.barrier() + %tmp3 = load float, ptr addrspace(4) getelementptr inbounds ([10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i32 0, i32 5), align 4 + call void @use(float %tmp3) + store float %v, ptr addrspace(4) getelementptr inbounds ([10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i32 0, i32 5), align 4 + call void @llvm.amdgcn.s.barrier() + %tmp4 = getelementptr inbounds [10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i32 0, i32 5 + %tmp5 = load float, ptr addrspace(4) %tmp4, align 4 + call void @use(float %tmp5) + store float %v, ptr addrspace(4) %tmp4, align 4 + call void @llvm.amdgcn.s.barrier() + %tmp6 = addrspacecast ptr addrspace(3) @array to ptr addrspace(4) + %tmp7 = getelementptr inbounds [10 x float], ptr addrspace(4) %tmp6, i32 0, i32 %i + %tmp8 = load float, ptr addrspace(4) %tmp7, align 4 + call void @use(float %tmp8) + store float %v, ptr addrspace(4) %tmp7, align 4 + call void @llvm.amdgcn.s.barrier() + ret void +} + +define i32 @constexpr_load_int_from_float_lds() #0 { +; CHECK-LABEL: define i32 @constexpr_load_int_from_float_lds( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(3) @scalar, align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +bb: + %tmp = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @scalar to ptr addrspace(4)), align 4 + ret i32 %tmp +} + +define i32 @load_int_from_global_float(ptr addrspace(1) %input, i32 %i, i32 %j) #0 { +; CHECK-LABEL: define i32 @load_int_from_global_float( +; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i32 [[I]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr addrspace(1) [[TMP1]], i32 [[J]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP4]] +; +bb: + %tmp = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = getelementptr float, ptr addrspace(4) %tmp, i32 %i + %tmp2 = getelementptr float, ptr addrspace(4) %tmp1, i32 %j + %tmp4 = load i32, ptr addrspace(4) %tmp2 + ret i32 %tmp4 +} + +define spir_kernel void @nested_const_expr() #0 { +; CHECK-LABEL: define spir_kernel void @nested_const_expr( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: store i32 1, ptr addrspace(3) getelementptr ([10 x float], ptr addrspace(3) @array, i64 0, i64 1), align 4 +; CHECK-NEXT: ret void +; + store i32 1, ptr addrspace(4) bitcast (ptr addrspace(4) getelementptr ([10 x float], ptr addrspace(4) addrspacecast (ptr addrspace(3) @array to ptr addrspace(4)), i64 0, i64 1) to ptr addrspace(4)), align 4 + + ret void +} + +define spir_kernel void @rauw(ptr addrspace(1) %input) #0 { +; CHECK-LABEL: define spir_kernel void @rauw( +; CHECK-SAME: ptr addrspace(1) [[INPUT:%.*]]) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr float, ptr addrspace(1) [[INPUT]], i64 10 +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[ADDR]], align 4 +; CHECK-NEXT: store float [[V]], ptr addrspace(1) [[ADDR]], align 4 +; CHECK-NEXT: ret void +; +bb: + %generic_input = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %addr = getelementptr float, ptr addrspace(4) %generic_input, i64 10 + %v = load float, ptr addrspace(4) %addr + store float %v, ptr addrspace(4) %addr + ret void +} + +; FIXME: Should be able to eliminate the cast inside the loop +define spir_kernel void @loop() #0 { +; CHECK-LABEL: define spir_kernel void @loop( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END:%.*]] = getelementptr float, ptr addrspace(3) @array, i64 10 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[V]]) +; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(3) [[I2]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %p = addrspacecast ptr addrspace(3) @array to ptr addrspace(4) + %end = getelementptr float, ptr addrspace(4) %p, i64 10 + br label %loop + +loop: ; preds = %loop, %entry + %i = phi ptr addrspace(4) [ %p, %entry ], [ %i2, %loop ] + %v = load float, ptr addrspace(4) %i + call void @use(float %v) + %i2 = getelementptr float, ptr addrspace(4) %i, i64 1 + %exit_cond = icmp eq ptr addrspace(4) %i2, %end + br i1 %exit_cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +@generic_end = external addrspace(1) global ptr addrspace(4) + +define spir_kernel void @loop_with_generic_bound() #0 { +; CHECK-LABEL: define spir_kernel void @loop_with_generic_bound( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[END:%.*]] = load ptr addrspace(4), ptr addrspace(1) @generic_end, align 8 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(3) [ @array, %[[ENTRY]] ], [ [[I2:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(3) [[I]], align 4 +; CHECK-NEXT: call addrspace(4) void @use(float [[V]]) +; CHECK-NEXT: [[I2]] = getelementptr float, ptr addrspace(3) [[I]], i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[I2]] to ptr addrspace(4) +; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq ptr addrspace(4) [[TMP0]], [[END]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %p = addrspacecast ptr addrspace(3) @array to ptr addrspace(4) + %end = load ptr addrspace(4), ptr addrspace(1) @generic_end + br label %loop + +loop: ; preds = %loop, %entry + %i = phi ptr addrspace(4) [ %p, %entry ], [ %i2, %loop ] + %v = load float, ptr addrspace(4) %i + call void @use(float %v) + %i2 = getelementptr float, ptr addrspace(4) %i, i64 1 + %exit_cond = icmp eq ptr addrspace(4) %i2, %end + br i1 %exit_cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +define void @select_bug() #0 { +; CHECK-LABEL: define void @select_bug( +; CHECK-SAME: ) addrspace(4) #[[ATTR0]] { +; CHECK-NEXT: [[CMP:%.*]] = icmp ne ptr addrspace(4) inttoptr (i64 4873 to ptr addrspace(4)), null +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 73, i64 93 +; CHECK-NEXT: [[ADD_PTR157:%.*]] = getelementptr inbounds i64, ptr addrspace(4) undef, i64 [[SEL]] +; CHECK-NEXT: [[CMP169:%.*]] = icmp uge ptr addrspace(4) undef, [[ADD_PTR157]] +; CHECK-NEXT: unreachable +; + %cmp = icmp ne ptr addrspace(4) inttoptr (i64 4873 to ptr addrspace(4)), null + %sel = select i1 %cmp, i64 73, i64 93 + %add.ptr157 = getelementptr inbounds i64, ptr addrspace(4) undef, i64 %sel + %cmp169 = icmp uge ptr addrspace(4) undef, %add.ptr157 + unreachable +} + +declare void @llvm.amdgcn.s.barrier() #1 +declare void @use(float) #0 + +attributes #0 = { nounwind } +attributes #1 = { convergent nounwind } diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll new file mode 100644 index 0000000000000..4e64ec7174017 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Test that pure addrspacecast instructions not directly connected to +; a memory operation are inferred. + +define void @addrspacecast_gep_addrspacecast(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @addrspacecast_gep_addrspacecast( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store i32 8, ptr addrspace(3) %asc1, align 8 + ret void +} + +define void @addrspacecast_different_pointee_type(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @addrspacecast_different_pointee_type( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store i8 8, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store i8 8, ptr addrspace(3) %asc1, align 8 + ret void +} + +define void @addrspacecast_to_memory(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @addrspacecast_to_memory( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store volatile ptr addrspace(3) [[GEP0]], ptr addrspace(1) undef, align +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store volatile ptr addrspace(3) %asc1, ptr addrspace(1) undef + ret void +} + +define void @multiuse_addrspacecast_gep_addrspacecast(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define void @multiuse_addrspacecast_gep_addrspacecast( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4) +; CHECK-NEXT: store volatile ptr addrspace(4) [[ASC0]], ptr addrspace(1) undef, align +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + store volatile ptr addrspace(4) %asc0, ptr addrspace(1) undef + %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 + %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store i32 8, ptr addrspace(3) %asc1, align 8 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll new file mode 100644 index 0000000000000..56412e50ed5d2 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Test that pure GetElementPtr instructions not directly connected to +; a memory operation are inferred. + +@lds = internal unnamed_addr addrspace(3) global [648 x double] undef, align 8 + +define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @simplified_constexpr_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr inbounds ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) addrspacecast (ptr addrspace(3) getelementptr inbounds ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384) to ptr addrspace(4)), i64 %idx0 + %asc = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store double 1.000000e+00, ptr addrspace(3) %asc, align 8 + ret void +} + +define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @constexpr_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 + %asc = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc, align 8 + ret void +} + +define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @constexpr_gep_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr addrspace(3) [[GEP0]], i64 [[IDX1:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP1]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 + %gep1 = getelementptr inbounds double, ptr addrspace(4) %gep0, i64 %idx1 + %asc = addrspacecast ptr addrspace(4) %gep1 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc, align 8 + ret void +} + +; Don't crash +define spir_kernel void @vector_gep(<4 x ptr addrspace(3)> %array) nounwind { +; CHECK-LABEL: @vector_gep( +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <4 x ptr addrspace(3)> [[ARRAY:%.*]] to <4 x ptr addrspace(4)> +; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr addrspace(4)> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> +; CHECK-NEXT: [[P0:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 0 +; CHECK-NEXT: [[P1:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 1 +; CHECK-NEXT: [[P2:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 2 +; CHECK-NEXT: [[P3:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 3 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P0]], align 4 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P1]], align 4 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P2]], align 4 +; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P3]], align 4 +; CHECK-NEXT: ret void +; + %cast = addrspacecast <4 x ptr addrspace(3)> %array to <4 x ptr addrspace(4)> + %p = getelementptr [1024 x i32], <4 x ptr addrspace(4)> %cast, <4 x i16> zeroinitializer, <4 x i16> + %p0 = extractelement <4 x ptr addrspace(4)> %p, i32 0 + %p1 = extractelement <4 x ptr addrspace(4)> %p, i32 1 + %p2 = extractelement <4 x ptr addrspace(4)> %p, i32 2 + %p3 = extractelement <4 x ptr addrspace(4)> %p, i32 3 + store i32 99, ptr addrspace(4) %p0 + store i32 99, ptr addrspace(4) %p1 + store i32 99, ptr addrspace(4) %p2 + store i32 99, ptr addrspace(4) %p3 + ret void +} + +define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { +; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast( +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX1:%.*]] +; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP1]], align 8 +; CHECK-NEXT: ret void +; + %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 + %asc0 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc0, align 8 + + %gep1 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx1 + %asc1 = addrspacecast ptr addrspace(4) %gep1 to ptr addrspace(3) + store double 1.0, ptr addrspace(3) %asc1, align 8 + + ret void +} + +define void @unorder_constexpr_gep_bitcast() { +; CHECK-LABEL: @unorder_constexpr_gep_bitcast( +; CHECK-NEXT: [[X0:%.*]] = load i32, ptr addrspace(3) @lds, align 4 +; CHECK-NEXT: [[X1:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @lds, i32 1), align 4 +; CHECK-NEXT: call void @use(i32 [[X0]], i32 [[X1]]) +; CHECK-NEXT: ret void +; + %x0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), align 4 + %x1 = load i32, ptr addrspace(4) getelementptr (i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i32 1), align 4 + call void @use(i32 %x0, i32 %x1) + ret void +} + +declare void @use(i32, i32) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll new file mode 100644 index 0000000000000..f736579c1765f --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s --check-prefix=SPIRV32 +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s --check-prefix=SPIRV64 + +; Addrspacecasts or bitcasts must be inserted after the instructions that define their uses. + +%struct.s0 = type { ptr addrspace(4), i32 } +%struct.s1 = type { %struct.s0 } + +@global0 = protected addrspace(2) externally_initialized global %struct.s1 zeroinitializer + +declare i32 @func(ptr %arg) + +define i32 @addrspacecast_insert_pos_assert() { +; CHECK-LABEL: @addrspacecast_insert_pos_assert( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) +; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] +; CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) +; CHECK-NEXT: ret i32 [[CALL]] +; +; SPIRV32-LABEL: @addrspacecast_insert_pos_assert( +; SPIRV32-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 +; SPIRV32-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 4 +; SPIRV32-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) +; SPIRV32-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) +; SPIRV32-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 +; SPIRV32-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; SPIRV32-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] +; SPIRV32-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) +; SPIRV32-NEXT: ret i32 [[CALL]] +; +; SPIRV64-LABEL: @addrspacecast_insert_pos_assert( +; SPIRV64-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 +; SPIRV64-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 8 +; SPIRV64-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) +; SPIRV64-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) +; SPIRV64-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 +; SPIRV64-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 +; SPIRV64-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] +; SPIRV64-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) +; SPIRV64-NEXT: ret i32 [[CALL]] +; + %alloca = alloca i32, align 4 + %cast = addrspacecast ptr %alloca to ptr addrspace(4) + %load0 = load ptr addrspace(4), ptr addrspace(2) @global0 + %load1 = load i32, ptr addrspace(4) %cast + %sext = sext i32 %load1 to i64 + %gep = getelementptr inbounds i32, ptr addrspace(4) %load0, i64 %sext + %call = call i32 @func(ptr addrspace(4) %gep) + ret i32 %call +} + +define void @bitcast_insert_pos_assert_1() { +; CHECK-LABEL: @bitcast_insert_pos_assert_1( +; CHECK-NEXT: bb.0: +; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) +; CHECK-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: br label [[BB_1:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] +; CHECK: bb.2: +; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI1]] to ptr addrspace(4) +; CHECK-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITP0]], align 8 +; CHECK-NEXT: br label [[BB_3]] +; CHECK: bb.3: +; CHECK-NEXT: ret void +; +; SPIRV32-LABEL: @bitcast_insert_pos_assert_1( +; SPIRV32-NEXT: bb.0: +; SPIRV32-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) +; SPIRV32-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: br label [[BB_1:%.*]] +; SPIRV32: bb.1: +; SPIRV32-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] +; SPIRV32: bb.2: +; SPIRV32-NEXT: [[PTI1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI1]] to ptr addrspace(4) +; SPIRV32-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITP0]], align 8 +; SPIRV32-NEXT: br label [[BB_3]] +; SPIRV32: bb.3: +; SPIRV32-NEXT: ret void +; +; SPIRV64-LABEL: @bitcast_insert_pos_assert_1( +; SPIRV64-NEXT: bb.0: +; SPIRV64-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) +; SPIRV64-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV64-NEXT: br label [[BB_1:%.*]] +; SPIRV64: bb.1: +; SPIRV64-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] +; SPIRV64: bb.2: +; SPIRV64-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr undef, align 8 +; SPIRV64-NEXT: br label [[BB_3]] +; SPIRV64: bb.3: +; SPIRV64-NEXT: ret void +; +bb.0: + %asc0 = addrspacecast ptr undef to ptr addrspace(4) + %pti0 = ptrtoint ptr addrspace(4) %asc0 to i64 + br label %bb.1 + +bb.1: + br i1 undef, label %bb.2, label %bb.3 + +bb.2: + %pti1 = ptrtoint ptr addrspace(4) %asc0 to i64 + %itp0 = inttoptr i64 %pti1 to ptr addrspace(4) + %load0 = load ptr addrspace(4), ptr addrspace(4) %itp0, align 8 + br label %bb.3 + +bb.3: + ret void +} + +define void @bitcast_insert_pos_assert_2() { +; CHECK-LABEL: @bitcast_insert_pos_assert_2( +; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 +; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; CHECK-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) +; CHECK-NEXT: [[ITP1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; CHECK-NEXT: [[ITP2:%.*]] = inttoptr i64 [[ITP1]] to ptr addrspace(4) +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[ITP2]], i64 1 +; CHECK-NEXT: ret void +; +; SPIRV32-LABEL: @bitcast_insert_pos_assert_2( +; SPIRV32-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 +; SPIRV32-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; SPIRV32-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) +; SPIRV32-NEXT: [[ITP1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV32-NEXT: [[ITP2:%.*]] = inttoptr i64 [[ITP1]] to ptr addrspace(4) +; SPIRV32-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[ITP2]], i64 1 +; SPIRV32-NEXT: ret void +; +; SPIRV64-LABEL: @bitcast_insert_pos_assert_2( +; SPIRV64-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 +; SPIRV64-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; SPIRV64-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 +; SPIRV64-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) +; SPIRV64-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) +; SPIRV64-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[TMP1]], i64 1 +; SPIRV64-NEXT: ret void +; + %alloca0 = alloca %struct.s1, align 16 + %asc0 = addrspacecast ptr %alloca0 to ptr addrspace(4) + %pti0 = ptrtoint ptr addrspace(4) %asc0 to i64 + %itp0 = inttoptr i64 %pti0 to ptr addrspace(4) + %itp1 = ptrtoint ptr addrspace(4) %asc0 to i64 + %itp2 = inttoptr i64 %itp1 to ptr addrspace(4) + %gep0 = getelementptr i64, ptr addrspace(4) %itp2, i64 1 + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll new file mode 100644 index 0000000000000..d6a58d2fccde0 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +define i1 @is_constant_global_to_flat(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define i1 @is_constant_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p1(ptr addrspace(1) [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(4) + %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) + ret i1 %ret +} + +define i1 @is_constant_local_to_flat(ptr addrspace(3) %ptr) { +; CHECK-LABEL: define i1 @is_constant_local_to_flat( +; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p3(ptr addrspace(3) [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) + %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) + ret i1 %ret +} + +define i1 @is_constant_private_to_flat(ptr %ptr) { +; CHECK-LABEL: define i1 @is_constant_private_to_flat( +; CHECK-SAME: ptr [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p0(ptr [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast ptr %ptr to ptr addrspace(4) + %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) + ret i1 %ret +} + +define i1 @is_constant_private_to_flat_v2(<2 x ptr> %ptr) { +; CHECK-LABEL: define i1 @is_constant_private_to_flat_v2( +; CHECK-SAME: <2 x ptr> [[PTR:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.v2p0(<2 x ptr> [[PTR]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(4)> + %ret = call i1 @llvm.is.constant.v2p4(<2 x ptr addrspace(4)> %cast) + ret i1 %ret +} + +define i1 @is_constant_i32(i32 %val) { +; CHECK-LABEL: define i1 @is_constant_i32( +; CHECK-SAME: i32 [[VAL:%.*]]) { +; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.i32(i32 [[VAL]]) +; CHECK-NEXT: ret i1 [[RET]] +; + %ret = call i1 @llvm.is.constant.i32(i32 %val) + ret i1 %ret +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg b/llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg new file mode 100644 index 0000000000000..78dd74cd6dc63 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/lit.local.cfg @@ -0,0 +1,2 @@ +if not "SPIRV" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll new file mode 100644 index 0000000000000..fd60c307a35fc --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll @@ -0,0 +1,145 @@ +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; CHECK-LABEL: @memset_group_to_flat( +; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memset_global_to_flat( +; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memset_group_to_flat_no_md( +; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}} +define spir_kernel void @memset_group_to_flat_no_md(ptr addrspace(3) %group.ptr, i64 %size) #0 { + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 %size, i1 false) + ret void +} + +; CHECK-LABEL: @memset_global_to_flat_no_md( +; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}} +define spir_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.ptr, i64 %size) #0 { + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 %size, i1 false) + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memcpy.inline.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.inline.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( +; CHECK: call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(4) align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(4) %src.ptr, i64 %size) #0 { + %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( +; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %src.group.ptr, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + %cast.dest = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( +; CHECK: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(1) align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr addrspace(4) + %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( +; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %dest.global.ptr, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 { + %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p3.i32(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !8 +define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !8 + ret void +} + +; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} +define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) + ret void +} + +; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest0, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} +; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest1, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} +define spir_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr addrspace(4) %dest0, ptr addrspace(4) %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest0, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest1, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) + ret void +} + +; Check for iterator problems if the pointer has 2 uses in the same call +; CHECK-LABEL: @memcpy_group_flat_to_flat_self( +; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %group.ptr, ptr addrspace(3) align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 { + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast, ptr addrspace(4) align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} +; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( +; CHECK: call void @llvm.memmove.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 +define spir_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) + call void @llvm.memmove.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 + ret void +} + +declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly, i8, i64, i1) #1 +declare void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.inline.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p4.p3.i32(ptr addrspace(4) nocapture writeonly, ptr addrspace(3) nocapture readonly, i32, i1) #1 +declare void @llvm.memmove.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } + +!0 = !{!1, !1, i64 0} +!1 = !{!"A", !2} +!2 = !{!"tbaa root"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"some scope 1"} +!5 = distinct !{!5, !"some domain"} +!6 = !{!7} +!7 = distinct !{!7, !5, !"some scope 2"} +!8 = !{i64 0, i64 8, null} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll new file mode 100644 index 0000000000000..83725d22df312 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=spirv32-- -S -passes=infer-address-spaces --verify-each %s | FileCheck %s +; RUN: opt -mtriple=spirv64-- -S -passes=infer-address-spaces --verify-each %s | FileCheck %s + +; Inst can use a value multiple time. When we're inserting an addrspacecast to flat, +; it's important all the identical uses use an indentical replacement, especially +; for PHIs. + +define spir_kernel void @test_phi() { +; CHECK-LABEL: @test_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOADED_PTR:%.*]] = load ptr addrspace(4), ptr addrspace(2) null, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[LOADED_PTR]] to ptr addrspace(1) +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP0]], i64 3 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[GEP]] to ptr addrspace(4) +; CHECK-NEXT: switch i32 0, label [[END:%.*]] [ +; CHECK-NEXT: i32 1, label [[END]] +; CHECK-NEXT: i32 4, label [[END]] +; CHECK-NEXT: i32 5, label [[BB1:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(1) [[GEP]], align 16 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[RETVAL_SROA_0_0_I569_PH:%.*]] = phi ptr addrspace(4) [ null, [[BB1]] ], [ [[TMP1]], [[BB0]] ], [ [[TMP1]], [[BB0]] ], [ [[TMP1]], [[BB0]] ] +; CHECK-NEXT: ret void +; +entry: + %loaded.ptr = load ptr addrspace(4), ptr addrspace(2) null, align 8 + br label %bb0 + +bb0: + %gep = getelementptr i64, ptr addrspace(4) %loaded.ptr, i64 3 + switch i32 0, label %end [ + i32 1, label %end + i32 4, label %end + i32 5, label %bb1 + ] + +bb1: + %0 = load double, ptr addrspace(4) %gep, align 16 + br label %end + +end: + %retval.sroa.0.0.i569.ph = phi ptr addrspace(4) [ null, %bb1 ], [ %gep, %bb0 ], [ %gep, %bb0 ], [ %gep, %bb0 ] + ret void +} + +declare void @uses_ptrs(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4)) + +; We shouldn't treat PHIs differently, even other users should have the same treatment. +; All occurences of %gep are replaced with an identical value. +define spir_kernel void @test_other() { +; CHECK-LABEL: @test_other( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOADED_PTR:%.*]] = load ptr addrspace(4), ptr addrspace(2) null, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[LOADED_PTR]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr addrspace(4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(4) [[TMP1]], i64 3 +; CHECK-NEXT: call void @uses_ptrs(ptr addrspace(4) [[GEP]], ptr addrspace(4) [[GEP]], ptr addrspace(4) [[GEP]]) +; CHECK-NEXT: ret void +; +entry: + %loaded.ptr = load ptr addrspace(4), ptr addrspace(2) null, align 8 + %gep = getelementptr i64, ptr addrspace(4) %loaded.ptr, i64 3 + call void @uses_ptrs(ptr addrspace(4) %gep, ptr addrspace(4) %gep, ptr addrspace(4) %gep) + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll new file mode 100644 index 0000000000000..b7c773e92cb2f --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +define void @prefetch_shared_to_flat(ptr addrspace(3) %group.ptr) { +; CHECK-LABEL: define void @prefetch_shared_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[GROUP_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_global_to_flat(ptr addrspace(1) %global.ptr) { +; CHECK-LABEL: define void @prefetch_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[GLOBAL_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_constant_to_flat(ptr addrspace(2) %const.ptr) { +; CHECK-LABEL: define void @prefetch_constant_to_flat( +; CHECK-SAME: ptr addrspace(2) [[CONST_PTR:%.*]]) { +; CHECK-NEXT: tail call void @llvm.prefetch.p2(ptr addrspace(2) [[CONST_PTR]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(2) %const.ptr to ptr addrspace(4) + tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_flat_to_shared(ptr addrspace(4) %flat.ptr) { +; CHECK-LABEL: define void @prefetch_flat_to_shared( +; CHECK-SAME: ptr addrspace(4) [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(4) [[FLAT_PTR]] to ptr addrspace(3) +; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[CAST]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %flat.ptr to ptr addrspace(3) + tail call void @llvm.prefetch.p3(ptr addrspace(3) %cast, i32 0, i32 0, i32 1) + ret void +} + +define void @prefetch_flat_to_global(ptr addrspace(4) %flat.ptr) { +; CHECK-LABEL: define void @prefetch_flat_to_global( +; CHECK-SAME: ptr addrspace(4) [[FLAT_PTR:%.*]]) { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(4) [[FLAT_PTR]] to ptr addrspace(1) +; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[CAST]], i32 0, i32 0, i32 1) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(4) %flat.ptr to ptr addrspace(1) + tail call void @llvm.prefetch.p1(ptr addrspace(1) %cast, i32 0, i32 0, i32 1) + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll new file mode 100644 index 0000000000000..296e3af86647e --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s + +; Check that InferAddressSpaces's cloneInstructionWithNewAddressSpace() propagates +; the debug location to new addrspacecast instruction which casts `%p` in the following test. + +@c0 = addrspace(2) global ptr poison + +define float @generic_ptr_from_constant() !dbg !5 { +; CHECK-LABEL: define float @generic_ptr_from_constant( +; CHECK-SAME: ) !dbg [[DBG5:![0-9]+]] { +; CHECK-NEXT: [[P:%.*]] = load ptr addrspace(4), ptr addrspace(2) @c0, align 8, !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1), !dbg [[DBG8]] +; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: ret float [[V]], !dbg [[DBG10:![0-9]+]] +; + %p = load ptr addrspace(4), ptr addrspace(2) @c0, align 8, !dbg !8 + %v = load float, ptr addrspace(4) %p, align 4, !dbg !9 + ret float %v, !dbg !10 +} + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!2, !3} +!llvm.module.flags = !{!4} + +; +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "temp.ll", directory: "/") +!2 = !{i32 3} +!3 = !{i32 0} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "generic_ptr_from_constant", linkageName: "generic_ptr_from_constant", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 1, column: 1, scope: !5) +!9 = !DILocation(line: 2, column: 1, scope: !5) +!10 = !DILocation(line: 3, column: 1, scope: !5) +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) +; CHECK: [[META1]] = !DIFile(filename: "temp.ll", directory: {{.*}}) +; CHECK: [[DBG5]] = distinct !DISubprogram(name: "generic_ptr_from_constant", linkageName: "generic_ptr_from_constant", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]]) +; CHECK: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]]) +; CHECK: [[META7]] = !{} +; CHECK: [[DBG8]] = !DILocation(line: 1, column: 1, scope: [[DBG5]]) +; CHECK: [[DBG9]] = !DILocation(line: 2, column: 1, scope: [[DBG5]]) +; CHECK: [[DBG10]] = !DILocation(line: 3, column: 1, scope: [[DBG5]]) +;. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll new file mode 100644 index 0000000000000..3b5d4b7adc3a7 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +%0 = type { i8, i8, i8 } + +; Make sure there is only one addrspacecast. The original cast should +; not be cloned to satisfy the second user. +define void @bar(ptr addrspace(1) %orig.ptr) { +; CHECK-LABEL: @bar( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ORIG_CAST:%.*]] = addrspacecast ptr addrspace(1) [[ORIG_PTR:%.*]] to ptr addrspace(4) +; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr addrspace(4) [[ORIG_CAST]], i64 0, i32 1 +; CHECK-NEXT: call void @foo(ptr addrspace(4) [[GEP0]]) +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[TMP0]], ptr addrspace(4) [[ORIG_CAST]], i64 0, i32 2 +; CHECK-NEXT: call void @foo(ptr addrspace(4) [[GEP1]]) +; CHECK-NEXT: ret void +; +bb: + %orig.cast = addrspacecast ptr addrspace(1) %orig.ptr to ptr addrspace(4) + %gep0 = getelementptr inbounds %0, ptr addrspace(4) %orig.cast, i64 0, i32 1 + call void @foo(ptr addrspace(4) %gep0) + %gep1 = getelementptr inbounds %0, ptr addrspace(4) %orig.cast, i64 0, i32 2 + call void @foo(ptr addrspace(4) %gep1) + ret void +} + +declare void @foo(ptr addrspace(4)) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll new file mode 100644 index 0000000000000..ec5c31f32d513 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=spirv32-- -S -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -mtriple=spirv64-- -S -passes=infer-address-spaces %s | FileCheck %s + +define spir_kernel void @phi_self(ptr addrspace(1) %arg) { +; CHECK-LABEL: @phi_self( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(1) [ [[I]], [[LOOP]] ], [ [[ARG:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[I1:%.*]] = load i8, ptr addrspace(1) [[I]], align 1 +; CHECK-NEXT: [[I2:%.*]] = icmp eq i8 [[I1]], 0 +; CHECK-NEXT: br i1 [[I2]], label [[LOOP]], label [[RET:%.*]] +; CHECK: ret: +; CHECK-NEXT: ret void +; +entry: + %cast = addrspacecast ptr addrspace(1) %arg to ptr addrspace(4) + br label %loop + +loop: + %i = phi ptr addrspace(4) [%i, %loop], [%cast, %entry] + %i1 = load i8, ptr addrspace(4) %i, align 1 + %i2 = icmp eq i8 %i1, 0 + br i1 %i2, label %loop, label %ret + +ret: + ret void +} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll new file mode 100644 index 0000000000000..b835a008a91e0 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s + +; Check that volatile users of addrspacecast are not replaced. + +define spir_kernel void @volatile_load_flat_from_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_global( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_load_flat_from_constant(ptr addrspace(2) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_constant( +; CHECK-SAME: ptr addrspace(2) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(2) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(2) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_load_flat_from_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_group( +; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_load_flat_from_private(ptr nocapture %input, ptr nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_private( +; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 +; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr %output to ptr addrspace(4) + %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 + store i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_store_flat_to_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_global( +; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_store_flat_to_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_group( +; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[OUTPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define spir_kernel void @volatile_store_flat_to_private(ptr nocapture %input, ptr nocapture %output) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_private( +; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(4) +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 +; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %tmp0 = addrspacecast ptr %input to ptr addrspace(4) + %tmp1 = addrspacecast ptr %output to ptr addrspace(4) + %val = load i32, ptr addrspace(4) %tmp0, align 4 + store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 + ret void +} + +define i32 @volatile_atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @volatile_atomicrmw_add_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = atomicrmw volatile add ptr addrspace(4) [[CAST]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = atomicrmw volatile add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define i32 @volatile_atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define i32 @volatile_atomicrmw_add_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = atomicrmw volatile add ptr addrspace(4) [[CAST]], i32 [[Y]] seq_cst, align 4 +; CHECK-NEXT: ret i32 [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = atomicrmw volatile add ptr addrspace(4) %cast, i32 %y seq_cst + ret i32 %ret +} + +define { i32, i1 } @volatile_cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @volatile_cmpxchg_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr addrspace(4) [[CAST]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + %ret = cmpxchg volatile ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define { i32, i1 } @volatile_cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { +; CHECK-LABEL: define { i32, i1 } @volatile_cmpxchg_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr addrspace(4) [[CAST]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; CHECK-NEXT: ret { i32, i1 } [[RET]] +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + %ret = cmpxchg volatile ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +define spir_kernel void @volatile_memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_memset_group_to_flat( +; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 [[CAST]], i8 4, i64 32, i1 true) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) + call void @llvm.memset.p0.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 true) + ret void +} + +define spir_kernel void @volatile_memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { +; CHECK-LABEL: define spir_kernel void @volatile_memset_global_to_flat( +; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) +; CHECK-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 [[CAST]], i8 4, i64 32, i1 true) +; CHECK-NEXT: ret void +; + %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) + call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 true) + ret void +} + +declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly, i8, i64, i1) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } From dc1a5f5d2e18b408fae3e04091dd653394695368 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 2 Oct 2024 18:27:24 +0100 Subject: [PATCH 02/14] Fix formatting. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 47 ++++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 91bcd68813fc5..3caf000f17117 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -97,17 +97,15 @@ SPIRVTargetMachine::SPIRVTargetMachine(const Target &T, const Triple &TT, setRequiresStructuredCFG(false); } -namespace { - enum AddressSpace { - Function = storageClassToAddressSpace(SPIRV::StorageClass::Function), - CrossWorkgroup = - storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup), - UniformConstant = - storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), - Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), - Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) - }; -} +enum AddressSpace { + Function = storageClassToAddressSpace(SPIRV::StorageClass::Function), + CrossWorkgroup = + storageClassToAddressSpace(SPIRV::StorageClass::CrossWorkgroup), + UniformConstant = + storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), + Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), + Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) +}; unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { const auto *LD = dyn_cast(V); @@ -148,9 +146,10 @@ SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { Value *Ptr; if (getTargetTriple().getVendor() == Triple::VendorType::AMD && match( - const_cast(V), - m_c_And(m_Not(m_Intrinsic(m_Value(Ptr))), - m_Not(m_Intrinsic(m_Deferred(Ptr)))))) + const_cast(V), + m_c_And(m_Not(m_Intrinsic(m_Value(Ptr))), + m_Not(m_Intrinsic( + m_Deferred(Ptr)))))) return std::pair(Ptr, AddressSpace::CrossWorkgroup); return std::pair(nullptr, UINT32_MAX); @@ -165,19 +164,19 @@ bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, } void SPIRVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { - PB.registerCGSCCOptimizerLateEPCallback([](CGSCCPassManager &PM, - OptimizationLevel Level) { - if (Level == OptimizationLevel::O0) - return; + PB.registerCGSCCOptimizerLateEPCallback( + [](CGSCCPassManager &PM, OptimizationLevel Level) { + if (Level == OptimizationLevel::O0) + return; - FunctionPassManager FPM; + FunctionPassManager FPM; - // Add infer address spaces pass to the opt pipeline after inlining - // but before SROA to increase SROA opportunities. - FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic)); + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + FPM.addPass(InferAddressSpacesPass(AddressSpace::Generic)); - PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); - }); + PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + }); } namespace { From 31a5ebe2248059c496043f234d994073f9066eb4 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sat, 12 Oct 2024 02:54:58 +0300 Subject: [PATCH 03/14] Reduce set of tests. --- .../SPIRV/infer-addrspacecast.ll | 65 ------ .../SPIRV/infer-getelementptr.ll | 108 ---------- .../SPIRV/insert-pos-assert.ll | 158 --------------- .../InferAddressSpaces/SPIRV/is.constant.ll | 57 ------ .../SPIRV/mem-intrinsics.ll | 145 -------------- .../SPIRV/multiple-uses-of-val.ll | 70 ------- .../InferAddressSpaces/SPIRV/prefetch.ll | 60 ------ .../preserving-debugloc-addrspacecast.ll | 48 ----- .../SPIRV/redundant-addrspacecast.ll | 28 --- .../InferAddressSpaces/SPIRV/self-phi.ll | 29 --- .../InferAddressSpaces/SPIRV/volatile.ll | 187 ------------------ 11 files changed, 955 deletions(-) delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll delete mode 100644 llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll deleted file mode 100644 index 4e64ec7174017..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-addrspacecast.ll +++ /dev/null @@ -1,65 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -; Test that pure addrspacecast instructions not directly connected to -; a memory operation are inferred. - -define void @addrspacecast_gep_addrspacecast(ptr addrspace(3) %ptr) { -; CHECK-LABEL: define void @addrspacecast_gep_addrspacecast( -; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 -; CHECK-NEXT: store i32 8, ptr addrspace(3) [[GEP0]], align 8 -; CHECK-NEXT: ret void -; - %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) - %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 - %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store i32 8, ptr addrspace(3) %asc1, align 8 - ret void -} - -define void @addrspacecast_different_pointee_type(ptr addrspace(3) %ptr) { -; CHECK-LABEL: define void @addrspacecast_different_pointee_type( -; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 -; CHECK-NEXT: store i8 8, ptr addrspace(3) [[GEP0]], align 8 -; CHECK-NEXT: ret void -; - %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) - %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 - %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store i8 8, ptr addrspace(3) %asc1, align 8 - ret void -} - -define void @addrspacecast_to_memory(ptr addrspace(3) %ptr) { -; CHECK-LABEL: define void @addrspacecast_to_memory( -; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 -; CHECK-NEXT: store volatile ptr addrspace(3) [[GEP0]], ptr addrspace(1) undef, align -; CHECK-NEXT: ret void -; - %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) - %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 - %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store volatile ptr addrspace(3) %asc1, ptr addrspace(1) undef - ret void -} - -define void @multiuse_addrspacecast_gep_addrspacecast(ptr addrspace(3) %ptr) { -; CHECK-LABEL: define void @multiuse_addrspacecast_gep_addrspacecast( -; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4) -; CHECK-NEXT: store volatile ptr addrspace(4) [[ASC0]], ptr addrspace(1) undef, align -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i32, ptr addrspace(3) [[PTR]], i64 9 -; CHECK-NEXT: store i32 8, ptr addrspace(3) [[GEP0]], align 8 -; CHECK-NEXT: ret void -; - %asc0 = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) - store volatile ptr addrspace(4) %asc0, ptr addrspace(1) undef - %gep0 = getelementptr i32, ptr addrspace(4) %asc0, i64 9 - %asc1 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store i32 8, ptr addrspace(3) %asc1, align 8 - ret void -} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll deleted file mode 100644 index 56412e50ed5d2..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/infer-getelementptr.ll +++ /dev/null @@ -1,108 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -; Test that pure GetElementPtr instructions not directly connected to -; a memory operation are inferred. - -@lds = internal unnamed_addr addrspace(3) global [648 x double] undef, align 8 - -define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { -; CHECK-LABEL: @simplified_constexpr_gep_addrspacecast( -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr inbounds ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] -; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 -; CHECK-NEXT: ret void -; - %gep0 = getelementptr inbounds double, ptr addrspace(4) addrspacecast (ptr addrspace(3) getelementptr inbounds ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384) to ptr addrspace(4)), i64 %idx0 - %asc = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store double 1.000000e+00, ptr addrspace(3) %asc, align 8 - ret void -} - -define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { -; CHECK-LABEL: @constexpr_gep_addrspacecast( -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] -; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 -; CHECK-NEXT: ret void -; - %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 - %asc = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store double 1.0, ptr addrspace(3) %asc, align 8 - ret void -} - -define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { -; CHECK-LABEL: @constexpr_gep_gep_addrspacecast( -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr addrspace(3) [[GEP0]], i64 [[IDX1:%.*]] -; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP1]], align 8 -; CHECK-NEXT: ret void -; - %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 - %gep1 = getelementptr inbounds double, ptr addrspace(4) %gep0, i64 %idx1 - %asc = addrspacecast ptr addrspace(4) %gep1 to ptr addrspace(3) - store double 1.0, ptr addrspace(3) %asc, align 8 - ret void -} - -; Don't crash -define spir_kernel void @vector_gep(<4 x ptr addrspace(3)> %array) nounwind { -; CHECK-LABEL: @vector_gep( -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <4 x ptr addrspace(3)> [[ARRAY:%.*]] to <4 x ptr addrspace(4)> -; CHECK-NEXT: [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr addrspace(4)> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> -; CHECK-NEXT: [[P0:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 0 -; CHECK-NEXT: [[P1:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 1 -; CHECK-NEXT: [[P2:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 2 -; CHECK-NEXT: [[P3:%.*]] = extractelement <4 x ptr addrspace(4)> [[P]], i32 3 -; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P0]], align 4 -; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P1]], align 4 -; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P2]], align 4 -; CHECK-NEXT: store i32 99, ptr addrspace(4) [[P3]], align 4 -; CHECK-NEXT: ret void -; - %cast = addrspacecast <4 x ptr addrspace(3)> %array to <4 x ptr addrspace(4)> - %p = getelementptr [1024 x i32], <4 x ptr addrspace(4)> %cast, <4 x i16> zeroinitializer, <4 x i16> - %p0 = extractelement <4 x ptr addrspace(4)> %p, i32 0 - %p1 = extractelement <4 x ptr addrspace(4)> %p, i32 1 - %p2 = extractelement <4 x ptr addrspace(4)> %p, i32 2 - %p3 = extractelement <4 x ptr addrspace(4)> %p, i32 3 - store i32 99, ptr addrspace(4) %p0 - store i32 99, ptr addrspace(4) %p1 - store i32 99, ptr addrspace(4) %p2 - store i32 99, ptr addrspace(4) %p3 - ret void -} - -define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { -; CHECK-LABEL: @repeated_constexpr_gep_addrspacecast( -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX0:%.*]] -; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP0]], align 8 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds double, ptr addrspace(3) getelementptr ([648 x double], ptr addrspace(3) @lds, i64 0, i64 384), i64 [[IDX1:%.*]] -; CHECK-NEXT: store double 1.000000e+00, ptr addrspace(3) [[GEP1]], align 8 -; CHECK-NEXT: ret void -; - %gep0 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx0 - %asc0 = addrspacecast ptr addrspace(4) %gep0 to ptr addrspace(3) - store double 1.0, ptr addrspace(3) %asc0, align 8 - - %gep1 = getelementptr inbounds double, ptr addrspace(4) getelementptr ([648 x double], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i64 0, i64 384), i64 %idx1 - %asc1 = addrspacecast ptr addrspace(4) %gep1 to ptr addrspace(3) - store double 1.0, ptr addrspace(3) %asc1, align 8 - - ret void -} - -define void @unorder_constexpr_gep_bitcast() { -; CHECK-LABEL: @unorder_constexpr_gep_bitcast( -; CHECK-NEXT: [[X0:%.*]] = load i32, ptr addrspace(3) @lds, align 4 -; CHECK-NEXT: [[X1:%.*]] = load i32, ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @lds, i32 1), align 4 -; CHECK-NEXT: call void @use(i32 [[X0]], i32 [[X1]]) -; CHECK-NEXT: ret void -; - %x0 = load i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), align 4 - %x1 = load i32, ptr addrspace(4) getelementptr (i32, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds to ptr addrspace(4)), i32 1), align 4 - call void @use(i32 %x0, i32 %x1) - ret void -} - -declare void @use(i32, i32) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll deleted file mode 100644 index f736579c1765f..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/insert-pos-assert.ll +++ /dev/null @@ -1,158 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s --check-prefix=SPIRV32 -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s --check-prefix=SPIRV64 - -; Addrspacecasts or bitcasts must be inserted after the instructions that define their uses. - -%struct.s0 = type { ptr addrspace(4), i32 } -%struct.s1 = type { %struct.s0 } - -@global0 = protected addrspace(2) externally_initialized global %struct.s1 zeroinitializer - -declare i32 @func(ptr %arg) - -define i32 @addrspacecast_insert_pos_assert() { -; CHECK-LABEL: @addrspacecast_insert_pos_assert( -; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) -; CHECK-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] -; CHECK-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) -; CHECK-NEXT: ret i32 [[CALL]] -; -; SPIRV32-LABEL: @addrspacecast_insert_pos_assert( -; SPIRV32-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 -; SPIRV32-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 4 -; SPIRV32-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) -; SPIRV32-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) -; SPIRV32-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 -; SPIRV32-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 -; SPIRV32-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] -; SPIRV32-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) -; SPIRV32-NEXT: ret i32 [[CALL]] -; -; SPIRV64-LABEL: @addrspacecast_insert_pos_assert( -; SPIRV64-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4 -; SPIRV64-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(2) @global0, align 8 -; SPIRV64-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[LOAD0]] to ptr addrspace(1) -; SPIRV64-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[TMP1]] to ptr addrspace(4) -; SPIRV64-NEXT: [[LOAD1:%.*]] = load i32, ptr [[ALLOCA]], align 4 -; SPIRV64-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD1]] to i64 -; SPIRV64-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP2]], i64 [[SEXT]] -; SPIRV64-NEXT: [[CALL:%.*]] = call i32 @func(ptr addrspace(4) [[GEP]]) -; SPIRV64-NEXT: ret i32 [[CALL]] -; - %alloca = alloca i32, align 4 - %cast = addrspacecast ptr %alloca to ptr addrspace(4) - %load0 = load ptr addrspace(4), ptr addrspace(2) @global0 - %load1 = load i32, ptr addrspace(4) %cast - %sext = sext i32 %load1 to i64 - %gep = getelementptr inbounds i32, ptr addrspace(4) %load0, i64 %sext - %call = call i32 @func(ptr addrspace(4) %gep) - ret i32 %call -} - -define void @bitcast_insert_pos_assert_1() { -; CHECK-LABEL: @bitcast_insert_pos_assert_1( -; CHECK-NEXT: bb.0: -; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) -; CHECK-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; CHECK-NEXT: br label [[BB_1:%.*]] -; CHECK: bb.1: -; CHECK-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] -; CHECK: bb.2: -; CHECK-NEXT: [[PTI1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; CHECK-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI1]] to ptr addrspace(4) -; CHECK-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITP0]], align 8 -; CHECK-NEXT: br label [[BB_3]] -; CHECK: bb.3: -; CHECK-NEXT: ret void -; -; SPIRV32-LABEL: @bitcast_insert_pos_assert_1( -; SPIRV32-NEXT: bb.0: -; SPIRV32-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) -; SPIRV32-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; SPIRV32-NEXT: br label [[BB_1:%.*]] -; SPIRV32: bb.1: -; SPIRV32-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] -; SPIRV32: bb.2: -; SPIRV32-NEXT: [[PTI1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; SPIRV32-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI1]] to ptr addrspace(4) -; SPIRV32-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ITP0]], align 8 -; SPIRV32-NEXT: br label [[BB_3]] -; SPIRV32: bb.3: -; SPIRV32-NEXT: ret void -; -; SPIRV64-LABEL: @bitcast_insert_pos_assert_1( -; SPIRV64-NEXT: bb.0: -; SPIRV64-NEXT: [[ASC0:%.*]] = addrspacecast ptr undef to ptr addrspace(4) -; SPIRV64-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; SPIRV64-NEXT: br label [[BB_1:%.*]] -; SPIRV64: bb.1: -; SPIRV64-NEXT: br i1 undef, label [[BB_2:%.*]], label [[BB_3:%.*]] -; SPIRV64: bb.2: -; SPIRV64-NEXT: [[LOAD0:%.*]] = load ptr addrspace(4), ptr undef, align 8 -; SPIRV64-NEXT: br label [[BB_3]] -; SPIRV64: bb.3: -; SPIRV64-NEXT: ret void -; -bb.0: - %asc0 = addrspacecast ptr undef to ptr addrspace(4) - %pti0 = ptrtoint ptr addrspace(4) %asc0 to i64 - br label %bb.1 - -bb.1: - br i1 undef, label %bb.2, label %bb.3 - -bb.2: - %pti1 = ptrtoint ptr addrspace(4) %asc0 to i64 - %itp0 = inttoptr i64 %pti1 to ptr addrspace(4) - %load0 = load ptr addrspace(4), ptr addrspace(4) %itp0, align 8 - br label %bb.3 - -bb.3: - ret void -} - -define void @bitcast_insert_pos_assert_2() { -; CHECK-LABEL: @bitcast_insert_pos_assert_2( -; CHECK-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 -; CHECK-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) -; CHECK-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; CHECK-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) -; CHECK-NEXT: [[ITP1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; CHECK-NEXT: [[ITP2:%.*]] = inttoptr i64 [[ITP1]] to ptr addrspace(4) -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[ITP2]], i64 1 -; CHECK-NEXT: ret void -; -; SPIRV32-LABEL: @bitcast_insert_pos_assert_2( -; SPIRV32-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 -; SPIRV32-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) -; SPIRV32-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; SPIRV32-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) -; SPIRV32-NEXT: [[ITP1:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; SPIRV32-NEXT: [[ITP2:%.*]] = inttoptr i64 [[ITP1]] to ptr addrspace(4) -; SPIRV32-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[ITP2]], i64 1 -; SPIRV32-NEXT: ret void -; -; SPIRV64-LABEL: @bitcast_insert_pos_assert_2( -; SPIRV64-NEXT: [[ALLOCA0:%.*]] = alloca [[STRUCT_S1:%.*]], align 16 -; SPIRV64-NEXT: [[ASC0:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) -; SPIRV64-NEXT: [[PTI0:%.*]] = ptrtoint ptr addrspace(4) [[ASC0]] to i64 -; SPIRV64-NEXT: [[ITP0:%.*]] = inttoptr i64 [[PTI0]] to ptr addrspace(4) -; SPIRV64-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ALLOCA0]] to ptr addrspace(4) -; SPIRV64-NEXT: [[GEP0:%.*]] = getelementptr i64, ptr addrspace(4) [[TMP1]], i64 1 -; SPIRV64-NEXT: ret void -; - %alloca0 = alloca %struct.s1, align 16 - %asc0 = addrspacecast ptr %alloca0 to ptr addrspace(4) - %pti0 = ptrtoint ptr addrspace(4) %asc0 to i64 - %itp0 = inttoptr i64 %pti0 to ptr addrspace(4) - %itp1 = ptrtoint ptr addrspace(4) %asc0 to i64 - %itp2 = inttoptr i64 %itp1 to ptr addrspace(4) - %gep0 = getelementptr i64, ptr addrspace(4) %itp2, i64 1 - ret void -} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll deleted file mode 100644 index d6a58d2fccde0..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/is.constant.ll +++ /dev/null @@ -1,57 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -define i1 @is_constant_global_to_flat(ptr addrspace(1) %ptr) { -; CHECK-LABEL: define i1 @is_constant_global_to_flat( -; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p1(ptr addrspace(1) [[PTR]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %cast = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(4) - %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) - ret i1 %ret -} - -define i1 @is_constant_local_to_flat(ptr addrspace(3) %ptr) { -; CHECK-LABEL: define i1 @is_constant_local_to_flat( -; CHECK-SAME: ptr addrspace(3) [[PTR:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p3(ptr addrspace(3) [[PTR]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %cast = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(4) - %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) - ret i1 %ret -} - -define i1 @is_constant_private_to_flat(ptr %ptr) { -; CHECK-LABEL: define i1 @is_constant_private_to_flat( -; CHECK-SAME: ptr [[PTR:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.p0(ptr [[PTR]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %cast = addrspacecast ptr %ptr to ptr addrspace(4) - %ret = call i1 @llvm.is.constant.p4(ptr addrspace(4) %cast) - ret i1 %ret -} - -define i1 @is_constant_private_to_flat_v2(<2 x ptr> %ptr) { -; CHECK-LABEL: define i1 @is_constant_private_to_flat_v2( -; CHECK-SAME: <2 x ptr> [[PTR:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.v2p0(<2 x ptr> [[PTR]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(4)> - %ret = call i1 @llvm.is.constant.v2p4(<2 x ptr addrspace(4)> %cast) - ret i1 %ret -} - -define i1 @is_constant_i32(i32 %val) { -; CHECK-LABEL: define i1 @is_constant_i32( -; CHECK-SAME: i32 [[VAL:%.*]]) { -; CHECK-NEXT: [[RET:%.*]] = call i1 @llvm.is.constant.i32(i32 [[VAL]]) -; CHECK-NEXT: ret i1 [[RET]] -; - %ret = call i1 @llvm.is.constant.i32(i32 %val) - ret i1 %ret -} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll deleted file mode 100644 index fd60c307a35fc..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/mem-intrinsics.ll +++ /dev/null @@ -1,145 +0,0 @@ -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -; CHECK-LABEL: @memset_group_to_flat( -; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memset_global_to_flat( -; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { - %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) - call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memset_group_to_flat_no_md( -; CHECK: call void @llvm.memset.p3.i64(ptr addrspace(3) align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}} -define spir_kernel void @memset_group_to_flat_no_md(ptr addrspace(3) %group.ptr, i64 %size) #0 { - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 %size, i1 false) - ret void -} - -; CHECK-LABEL: @memset_global_to_flat_no_md( -; CHECK: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}} -define spir_kernel void @memset_global_to_flat_no_md(ptr addrspace(1) %global.ptr, i64 %size) #0 { - %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) - call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 %size, i1 false) - ret void -} - -; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( -; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memcpy_inline_flat_to_flat_replace_src_with_group( -; CHECK: call void @llvm.memcpy.inline.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_inline_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.inline.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 42, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( -; CHECK: call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(4) align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_flat_to_flat_replace_dest_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(4) %src.ptr, i64 %size) #0 { - %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( -; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %src.group.ptr, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(ptr addrspace(3) %dest.group.ptr, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - %cast.dest = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( -; CHECK: call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) align 4 %dest.group.ptr, ptr addrspace(1) align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(ptr addrspace(3) %dest.group.ptr, ptr addrspace(1) %src.global.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(1) %src.global.ptr to ptr addrspace(4) - %cast.dest = addrspacecast ptr addrspace(3) %dest.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( -; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %dest.global.ptr, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_group_to_flat_replace_dest_global(ptr addrspace(1) %dest.global.ptr, ptr addrspace(3) %src.group.ptr, i32 %size) #0 { - %cast.dest = addrspacecast ptr addrspace(1) %dest.global.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p3.i32(ptr addrspace(4) align 4 %cast.dest, ptr addrspace(3) align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( -; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !8 -define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !8 - ret void -} - -; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( -; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} -define spir_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) - ret void -} - -; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( -; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest0, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} -; CHECK: call void @llvm.memcpy.p4.p3.i64(ptr addrspace(4) align 4 %dest1, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false){{$}} -define spir_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(ptr addrspace(4) %dest0, ptr addrspace(4) %dest1, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest0, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %dest1, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false) - ret void -} - -; Check for iterator problems if the pointer has 2 uses in the same call -; CHECK-LABEL: @memcpy_group_flat_to_flat_self( -; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 4 %group.ptr, ptr addrspace(3) align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memcpy_group_flat_to_flat_self(ptr addrspace(3) %group.ptr) #0 { - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) align 4 %cast, ptr addrspace(4) align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} -; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( -; CHECK: call void @llvm.memmove.p4.p3.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(3) align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 -define spir_kernel void @memmove_flat_to_flat_replace_src_with_group(ptr addrspace(4) %dest, ptr addrspace(3) %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast ptr addrspace(3) %src.group.ptr to ptr addrspace(4) - call void @llvm.memmove.p4.p4.i64(ptr addrspace(4) align 4 %dest, ptr addrspace(4) align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !6 - ret void -} - -declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly, i8, i64, i1) #1 -declare void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 -declare void @llvm.memcpy.inline.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 -declare void @llvm.memcpy.p4.p3.i32(ptr addrspace(4) nocapture writeonly, ptr addrspace(3) nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p4.p4.i64(ptr addrspace(4) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1) #1 - -attributes #0 = { nounwind } -attributes #1 = { argmemonly nounwind } - -!0 = !{!1, !1, i64 0} -!1 = !{!"A", !2} -!2 = !{!"tbaa root"} -!3 = !{!4} -!4 = distinct !{!4, !5, !"some scope 1"} -!5 = distinct !{!5, !"some domain"} -!6 = !{!7} -!7 = distinct !{!7, !5, !"some scope 2"} -!8 = !{i64 0, i64 8, null} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll deleted file mode 100644 index 83725d22df312..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/multiple-uses-of-val.ll +++ /dev/null @@ -1,70 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=spirv32-- -S -passes=infer-address-spaces --verify-each %s | FileCheck %s -; RUN: opt -mtriple=spirv64-- -S -passes=infer-address-spaces --verify-each %s | FileCheck %s - -; Inst can use a value multiple time. When we're inserting an addrspacecast to flat, -; it's important all the identical uses use an indentical replacement, especially -; for PHIs. - -define spir_kernel void @test_phi() { -; CHECK-LABEL: @test_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOADED_PTR:%.*]] = load ptr addrspace(4), ptr addrspace(2) null, align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[LOADED_PTR]] to ptr addrspace(1) -; CHECK-NEXT: br label [[BB0:%.*]] -; CHECK: bb0: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP0]], i64 3 -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[GEP]] to ptr addrspace(4) -; CHECK-NEXT: switch i32 0, label [[END:%.*]] [ -; CHECK-NEXT: i32 1, label [[END]] -; CHECK-NEXT: i32 4, label [[END]] -; CHECK-NEXT: i32 5, label [[BB1:%.*]] -; CHECK-NEXT: ] -; CHECK: bb1: -; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(1) [[GEP]], align 16 -; CHECK-NEXT: br label [[END]] -; CHECK: end: -; CHECK-NEXT: [[RETVAL_SROA_0_0_I569_PH:%.*]] = phi ptr addrspace(4) [ null, [[BB1]] ], [ [[TMP1]], [[BB0]] ], [ [[TMP1]], [[BB0]] ], [ [[TMP1]], [[BB0]] ] -; CHECK-NEXT: ret void -; -entry: - %loaded.ptr = load ptr addrspace(4), ptr addrspace(2) null, align 8 - br label %bb0 - -bb0: - %gep = getelementptr i64, ptr addrspace(4) %loaded.ptr, i64 3 - switch i32 0, label %end [ - i32 1, label %end - i32 4, label %end - i32 5, label %bb1 - ] - -bb1: - %0 = load double, ptr addrspace(4) %gep, align 16 - br label %end - -end: - %retval.sroa.0.0.i569.ph = phi ptr addrspace(4) [ null, %bb1 ], [ %gep, %bb0 ], [ %gep, %bb0 ], [ %gep, %bb0 ] - ret void -} - -declare void @uses_ptrs(ptr addrspace(4), ptr addrspace(4), ptr addrspace(4)) - -; We shouldn't treat PHIs differently, even other users should have the same treatment. -; All occurences of %gep are replaced with an identical value. -define spir_kernel void @test_other() { -; CHECK-LABEL: @test_other( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOADED_PTR:%.*]] = load ptr addrspace(4), ptr addrspace(2) null, align 8 -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[LOADED_PTR]] to ptr addrspace(1) -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr addrspace(4) -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr addrspace(4) [[TMP1]], i64 3 -; CHECK-NEXT: call void @uses_ptrs(ptr addrspace(4) [[GEP]], ptr addrspace(4) [[GEP]], ptr addrspace(4) [[GEP]]) -; CHECK-NEXT: ret void -; -entry: - %loaded.ptr = load ptr addrspace(4), ptr addrspace(2) null, align 8 - %gep = getelementptr i64, ptr addrspace(4) %loaded.ptr, i64 3 - call void @uses_ptrs(ptr addrspace(4) %gep, ptr addrspace(4) %gep, ptr addrspace(4) %gep) - ret void -} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll deleted file mode 100644 index b7c773e92cb2f..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/prefetch.ll +++ /dev/null @@ -1,60 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -define void @prefetch_shared_to_flat(ptr addrspace(3) %group.ptr) { -; CHECK-LABEL: define void @prefetch_shared_to_flat( -; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) { -; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[GROUP_PTR]], i32 0, i32 0, i32 1) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) - ret void -} - -define void @prefetch_global_to_flat(ptr addrspace(1) %global.ptr) { -; CHECK-LABEL: define void @prefetch_global_to_flat( -; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) { -; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[GLOBAL_PTR]], i32 0, i32 0, i32 1) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) - tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) - ret void -} - -define void @prefetch_constant_to_flat(ptr addrspace(2) %const.ptr) { -; CHECK-LABEL: define void @prefetch_constant_to_flat( -; CHECK-SAME: ptr addrspace(2) [[CONST_PTR:%.*]]) { -; CHECK-NEXT: tail call void @llvm.prefetch.p2(ptr addrspace(2) [[CONST_PTR]], i32 0, i32 0, i32 1) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(2) %const.ptr to ptr addrspace(4) - tail call void @llvm.prefetch.p4(ptr addrspace(4) %cast, i32 0, i32 0, i32 1) - ret void -} - -define void @prefetch_flat_to_shared(ptr addrspace(4) %flat.ptr) { -; CHECK-LABEL: define void @prefetch_flat_to_shared( -; CHECK-SAME: ptr addrspace(4) [[FLAT_PTR:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(4) [[FLAT_PTR]] to ptr addrspace(3) -; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[CAST]], i32 0, i32 0, i32 1) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(4) %flat.ptr to ptr addrspace(3) - tail call void @llvm.prefetch.p3(ptr addrspace(3) %cast, i32 0, i32 0, i32 1) - ret void -} - -define void @prefetch_flat_to_global(ptr addrspace(4) %flat.ptr) { -; CHECK-LABEL: define void @prefetch_flat_to_global( -; CHECK-SAME: ptr addrspace(4) [[FLAT_PTR:%.*]]) { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(4) [[FLAT_PTR]] to ptr addrspace(1) -; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[CAST]], i32 0, i32 0, i32 1) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(4) %flat.ptr to ptr addrspace(1) - tail call void @llvm.prefetch.p1(ptr addrspace(1) %cast, i32 0, i32 0, i32 1) - ret void -} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll deleted file mode 100644 index 296e3af86647e..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/preserving-debugloc-addrspacecast.ll +++ /dev/null @@ -1,48 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s - -; Check that InferAddressSpaces's cloneInstructionWithNewAddressSpace() propagates -; the debug location to new addrspacecast instruction which casts `%p` in the following test. - -@c0 = addrspace(2) global ptr poison - -define float @generic_ptr_from_constant() !dbg !5 { -; CHECK-LABEL: define float @generic_ptr_from_constant( -; CHECK-SAME: ) !dbg [[DBG5:![0-9]+]] { -; CHECK-NEXT: [[P:%.*]] = load ptr addrspace(4), ptr addrspace(2) @c0, align 8, !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1), !dbg [[DBG8]] -; CHECK-NEXT: [[V:%.*]] = load float, ptr addrspace(1) [[TMP1]], align 4, !dbg [[DBG9:![0-9]+]] -; CHECK-NEXT: ret float [[V]], !dbg [[DBG10:![0-9]+]] -; - %p = load ptr addrspace(4), ptr addrspace(2) @c0, align 8, !dbg !8 - %v = load float, ptr addrspace(4) %p, align 4, !dbg !9 - ret float %v, !dbg !10 -} - -!llvm.dbg.cu = !{!0} -!llvm.debugify = !{!2, !3} -!llvm.module.flags = !{!4} - -; -!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) -!1 = !DIFile(filename: "temp.ll", directory: "/") -!2 = !{i32 3} -!3 = !{i32 0} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = distinct !DISubprogram(name: "generic_ptr_from_constant", linkageName: "generic_ptr_from_constant", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) -!6 = !DISubroutineType(types: !7) -!7 = !{} -!8 = !DILocation(line: 1, column: 1, scope: !5) -!9 = !DILocation(line: 2, column: 1, scope: !5) -!10 = !DILocation(line: 3, column: 1, scope: !5) -;. -; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) -; CHECK: [[META1]] = !DIFile(filename: "temp.ll", directory: {{.*}}) -; CHECK: [[DBG5]] = distinct !DISubprogram(name: "generic_ptr_from_constant", linkageName: "generic_ptr_from_constant", scope: null, file: [[META1]], line: 1, type: [[META6:![0-9]+]], scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]]) -; CHECK: [[META6]] = !DISubroutineType(types: [[META7:![0-9]+]]) -; CHECK: [[META7]] = !{} -; CHECK: [[DBG8]] = !DILocation(line: 1, column: 1, scope: [[DBG5]]) -; CHECK: [[DBG9]] = !DILocation(line: 2, column: 1, scope: [[DBG5]]) -; CHECK: [[DBG10]] = !DILocation(line: 3, column: 1, scope: [[DBG5]]) -;. diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll deleted file mode 100644 index 3b5d4b7adc3a7..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/redundant-addrspacecast.ll +++ /dev/null @@ -1,28 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -%0 = type { i8, i8, i8 } - -; Make sure there is only one addrspacecast. The original cast should -; not be cloned to satisfy the second user. -define void @bar(ptr addrspace(1) %orig.ptr) { -; CHECK-LABEL: @bar( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ORIG_CAST:%.*]] = addrspacecast ptr addrspace(1) [[ORIG_PTR:%.*]] to ptr addrspace(4) -; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr addrspace(4) [[ORIG_CAST]], i64 0, i32 1 -; CHECK-NEXT: call void @foo(ptr addrspace(4) [[GEP0]]) -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[TMP0]], ptr addrspace(4) [[ORIG_CAST]], i64 0, i32 2 -; CHECK-NEXT: call void @foo(ptr addrspace(4) [[GEP1]]) -; CHECK-NEXT: ret void -; -bb: - %orig.cast = addrspacecast ptr addrspace(1) %orig.ptr to ptr addrspace(4) - %gep0 = getelementptr inbounds %0, ptr addrspace(4) %orig.cast, i64 0, i32 1 - call void @foo(ptr addrspace(4) %gep0) - %gep1 = getelementptr inbounds %0, ptr addrspace(4) %orig.cast, i64 0, i32 2 - call void @foo(ptr addrspace(4) %gep1) - ret void -} - -declare void @foo(ptr addrspace(4)) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll deleted file mode 100644 index ec5c31f32d513..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/self-phi.ll +++ /dev/null @@ -1,29 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=spirv32-- -S -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -mtriple=spirv64-- -S -passes=infer-address-spaces %s | FileCheck %s - -define spir_kernel void @phi_self(ptr addrspace(1) %arg) { -; CHECK-LABEL: @phi_self( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(1) [ [[I]], [[LOOP]] ], [ [[ARG:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[I1:%.*]] = load i8, ptr addrspace(1) [[I]], align 1 -; CHECK-NEXT: [[I2:%.*]] = icmp eq i8 [[I1]], 0 -; CHECK-NEXT: br i1 [[I2]], label [[LOOP]], label [[RET:%.*]] -; CHECK: ret: -; CHECK-NEXT: ret void -; -entry: - %cast = addrspacecast ptr addrspace(1) %arg to ptr addrspace(4) - br label %loop - -loop: - %i = phi ptr addrspace(4) [%i, %loop], [%cast, %entry] - %i1 = load i8, ptr addrspace(4) %i, align 1 - %i2 = icmp eq i8 %i1, 0 - br i1 %i2, label %loop, label %ret - -ret: - ret void -} diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll deleted file mode 100644 index b835a008a91e0..0000000000000 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/volatile.ll +++ /dev/null @@ -1,187 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s - -; Check that volatile users of addrspacecast are not replaced. - -define spir_kernel void @volatile_load_flat_from_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_global( -; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) - %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 - store i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define spir_kernel void @volatile_load_flat_from_constant(ptr addrspace(2) nocapture %input, ptr addrspace(1) nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_constant( -; CHECK-SAME: ptr addrspace(2) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(2) [[INPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr addrspace(2) %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) - %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 - store i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define spir_kernel void @volatile_load_flat_from_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_group( -; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[INPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 -; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) - %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 - store i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define spir_kernel void @volatile_load_flat_from_private(ptr nocapture %input, ptr nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_load_flat_from_private( -; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load volatile i32, ptr addrspace(4) [[TMP0]], align 4 -; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr %output to ptr addrspace(4) - %val = load volatile i32, ptr addrspace(4) %tmp0, align 4 - store i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define spir_kernel void @volatile_store_flat_to_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_global( -; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 -; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) - %val = load i32, ptr addrspace(4) %tmp0, align 4 - store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define spir_kernel void @volatile_store_flat_to_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_group( -; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(3) [[OUTPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 -; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) - %val = load i32, ptr addrspace(4) %tmp0, align 4 - store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define spir_kernel void @volatile_store_flat_to_private(ptr nocapture %input, ptr nocapture %output) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_store_flat_to_private( -; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(4) -; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 -; CHECK-NEXT: store volatile i32 [[VAL]], ptr addrspace(4) [[TMP1]], align 4 -; CHECK-NEXT: ret void -; - %tmp0 = addrspacecast ptr %input to ptr addrspace(4) - %tmp1 = addrspacecast ptr %output to ptr addrspace(4) - %val = load i32, ptr addrspace(4) %tmp0, align 4 - store volatile i32 %val, ptr addrspace(4) %tmp1, align 4 - ret void -} - -define i32 @volatile_atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { -; CHECK-LABEL: define i32 @volatile_atomicrmw_add_group_to_flat( -; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) -; CHECK-NEXT: [[RET:%.*]] = atomicrmw volatile add ptr addrspace(4) [[CAST]], i32 [[Y]] seq_cst, align 4 -; CHECK-NEXT: ret i32 [[RET]] -; - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - %ret = atomicrmw volatile add ptr addrspace(4) %cast, i32 %y seq_cst - ret i32 %ret -} - -define i32 @volatile_atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { -; CHECK-LABEL: define i32 @volatile_atomicrmw_add_global_to_flat( -; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) -; CHECK-NEXT: [[RET:%.*]] = atomicrmw volatile add ptr addrspace(4) [[CAST]], i32 [[Y]] seq_cst, align 4 -; CHECK-NEXT: ret i32 [[RET]] -; - %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) - %ret = atomicrmw volatile add ptr addrspace(4) %cast, i32 %y seq_cst - ret i32 %ret -} - -define { i32, i1 } @volatile_cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 { -; CHECK-LABEL: define { i32, i1 } @volatile_cmpxchg_global_to_flat( -; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr addrspace(4) [[CAST]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 -; CHECK-NEXT: ret { i32, i1 } [[RET]] -; - %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) - %ret = cmpxchg volatile ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic - ret { i32, i1 } %ret -} - -define { i32, i1 } @volatile_cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 { -; CHECK-LABEL: define { i32, i1 } @volatile_cmpxchg_group_to_flat( -; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr addrspace(4) [[CAST]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 -; CHECK-NEXT: ret { i32, i1 } [[RET]] -; - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - %ret = cmpxchg volatile ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic - ret { i32, i1 } %ret -} - -define spir_kernel void @volatile_memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_memset_group_to_flat( -; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GROUP_PTR]] to ptr addrspace(4) -; CHECK-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 [[CAST]], i8 4, i64 32, i1 true) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) - call void @llvm.memset.p0.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 true) - ret void -} - -define spir_kernel void @volatile_memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 { -; CHECK-LABEL: define spir_kernel void @volatile_memset_global_to_flat( -; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL_PTR]] to ptr addrspace(4) -; CHECK-NEXT: call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 [[CAST]], i8 4, i64 32, i1 true) -; CHECK-NEXT: ret void -; - %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) - call void @llvm.memset.p4.i64(ptr addrspace(4) align 4 %cast, i8 4, i64 32, i1 true) - ret void -} - -declare void @llvm.memset.p4.i64(ptr addrspace(4) nocapture writeonly, i8, i64, i1) #1 - -attributes #0 = { nounwind } -attributes #1 = { argmemonly nounwind } From a01e1bc58fe6113d869d7d2d9889b1e8a1b26cfb Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sat, 12 Oct 2024 03:51:48 +0300 Subject: [PATCH 04/14] Fix formatting. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 3caf000f17117..ea80c86e00b6c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -148,8 +148,8 @@ SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { match( const_cast(V), m_c_And(m_Not(m_Intrinsic(m_Value(Ptr))), - m_Not(m_Intrinsic( - m_Deferred(Ptr)))))) + m_Not(m_Intrinsic( + m_Deferred(Ptr)))))) return std::pair(Ptr, AddressSpace::CrossWorkgroup); return std::pair(nullptr, UINT32_MAX); From ab1fb667c81a32ca01a29b92095d21c464686615 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sat, 12 Oct 2024 14:29:24 +0300 Subject: [PATCH 05/14] Fix inclusion ordering. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index ea80c86e00b6c..874afdbaac7b6 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -25,9 +25,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" -#include "llvm/InitializePasses.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Passes/OptimizationLevel.h" From 168149a08c159ed4f7db7144f8ce8dc65219f77f Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sat, 12 Oct 2024 19:00:09 +0300 Subject: [PATCH 06/14] Only enable "fancy" stuff fof amdgcnspirv for now. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 12 ++++++++++++ .../InferAddressSpaces/SPIRV/assumed-addrspace.ll | 3 +-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 874afdbaac7b6..1b0a1bd943357 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -108,6 +108,11 @@ enum AddressSpace { }; unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { + // TODO: we only enable this for AMDGCN flavoured SPIR-V, where we know it to + // be correct; this might be relaxed in the future. + if (getTargetTriple().getVendor() != Triple::VendorType::AMD) + return UINT32_MAX; + const auto *LD = dyn_cast(V); if (!LD) return UINT32_MAX; @@ -127,6 +132,13 @@ unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { std::pair SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { + // TODO: this is only enabled for AMDGCN flavoured SPIR-V at the moment, where + // we can rely on the intrinsics being available; we should re-implement it on + // top of SPIR-V specific intrinsics if/when they are added or + // OpGenericCastToPtrExplicit / OpGenericPtrMemSemantics directly. + if (getTargetTriple().getVendor() != Triple::VendorType::AMD) + return std::pair(nullptr, UINT32_MAX); + using namespace PatternMatch; if (auto *II = dyn_cast(V)) { diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll index 9b65ff44f288f..e9a4eb5cc61ce 100644 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/assumed-addrspace.ll @@ -1,5 +1,4 @@ -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces -o - %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces -o - %s | FileCheck %s +; RUN: opt -S -mtriple=spirv64-amd-amdhsa -passes=infer-address-spaces -o - %s | FileCheck %s @c0 = addrspace(2) global ptr undef From a7d1467d81aa9eacd5d22c3218f305a317a35a0e Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Tue, 22 Oct 2024 15:06:35 +0100 Subject: [PATCH 07/14] Remove spurious target check, clarify comment. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 1b0a1bd943357..7b2324de99b9c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -132,13 +132,9 @@ unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { std::pair SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { - // TODO: this is only enabled for AMDGCN flavoured SPIR-V at the moment, where - // we can rely on the intrinsics being available; we should re-implement it on - // top of SPIR-V specific intrinsics if/when they are added or - // OpGenericCastToPtrExplicit / OpGenericPtrMemSemantics directly. - if (getTargetTriple().getVendor() != Triple::VendorType::AMD) - return std::pair(nullptr, UINT32_MAX); - + // TODO: this is will only fire for AMDGCN flavoured SPIR-V at the moment, + // where the intrinsics are available; we should re-implement the predicates + // on top of SPIR-V specific intrinsics OpGenericPtrMemSemantics directly. using namespace PatternMatch; if (auto *II = dyn_cast(V)) { From a7073634a454918ab94c6d2c846c48f1b949a044 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Thu, 28 Nov 2024 02:39:37 +0000 Subject: [PATCH 08/14] Implement feedback. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 33 -------------------- llvm/lib/Target/SPIRV/SPIRVTargetMachine.h | 2 -- 2 files changed, 35 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 882929b89a50a..7fa472464525d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -131,39 +131,6 @@ unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { return AddressSpace::CrossWorkgroup; } -std::pair -SPIRVTargetMachine::getPredicatedAddrSpace(const Value *V) const { - // TODO: this is will only fire for AMDGCN flavoured SPIR-V at the moment, - // where the intrinsics are available; we should re-implement the predicates - // on top of SPIR-V specific intrinsics OpGenericPtrMemSemantics directly. - using namespace PatternMatch; - - if (auto *II = dyn_cast(V)) { - switch (II->getIntrinsicID()) { - case Intrinsic::amdgcn_is_shared: - return std::pair(II->getArgOperand(0), AddressSpace::Workgroup); - case Intrinsic::amdgcn_is_private: - return std::pair(II->getArgOperand(0), AddressSpace::Function); - default: - break; - } - return std::pair(nullptr, UINT32_MAX); - } - // Check the global pointer predication based on - // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and - // the order of 'is_shared' and 'is_private' is not significant. - Value *Ptr; - if (getTargetTriple().getVendor() == Triple::VendorType::AMD && - match( - const_cast(V), - m_c_And(m_Not(m_Intrinsic(m_Value(Ptr))), - m_Not(m_Intrinsic( - m_Deferred(Ptr)))))) - return std::pair(Ptr, AddressSpace::CrossWorkgroup); - - return std::pair(nullptr, UINT32_MAX); -} - bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h index c0eae45845339..6754af67f51bf 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.h @@ -45,8 +45,6 @@ class SPIRVTargetMachine : public CodeGenTargetMachineImpl { } unsigned getAssumedAddrSpace(const Value *V) const override; - std::pair - getPredicatedAddrSpace(const Value *V) const override; bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; void registerPassBuilderCallbacks(PassBuilder &PB) override; From fe923f218a69ce3f1dbe0a2b3d1f1432d9a1b680 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Thu, 28 Nov 2024 18:04:50 +0000 Subject: [PATCH 09/14] Update llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp Co-authored-by: Victor Lomuller --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 7fa472464525d..bbe94e28dea3f 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -125,7 +125,7 @@ unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { const auto *Ptr = LD->getPointerOperand(); if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant) return UINT32_MAX; - // For a loaded from a pointer to UniformConstant, we can infer CrossWorkgroup + // For a load from a pointer to UniformConstant, we can infer CrossWorkgroup // storage, as this could only have been legally initialised with a // CrossWorkgroup (aka device) constant pointer. return AddressSpace::CrossWorkgroup; From c7e34e776d075467ea2294f8d03c65d4b0c9e61c Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Wed, 4 Dec 2024 19:48:40 +0000 Subject: [PATCH 10/14] Guard AMDGCN specific predicate implementation. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index bbe94e28dea3f..a4fdc1791116c 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -133,6 +133,8 @@ unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { bool SPIRVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + if (getTargetTriple().getVendor() != Triple::VendorType::AMD) + return false; if (SrcAS != AddressSpace::Generic && SrcAS != AddressSpace::CrossWorkgroup) return false; return DestAS == AddressSpace::Generic || From ac82484ebd52b08779c5c32b82417fbe236f2bf2 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 2 Mar 2025 13:14:03 +0000 Subject: [PATCH 11/14] Update test. --- .../amdgpu-kernel-arg-pointer-type.cu | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index 60d2064bffbe4..c13123ab11356 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -58,7 +58,7 @@ // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel1Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noundef captures(none) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META5:![0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -124,7 +124,7 @@ __global__ void kernel1(int *x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel2Ri( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noundef align 4 captures(none) dereferenceable(4) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -191,7 +191,7 @@ __global__ void kernel2(int &x) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel3PU3AS2iPU3AS1i( -// OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly captures(none) [[X:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly captures(none) [[X:%.*]], ptr addrspace(1) noundef writeonly captures(none) initializes((0, 4)) [[Y:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(2) [[X]], align 4 // OPT-SPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[Y]], align 4 @@ -257,7 +257,7 @@ __global__ void kernel3(__attribute__((address_space(2))) int *x, // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_func void @_Z4funcPi( -// OPT-SPIRV-SAME: ptr addrspace(4) noundef captures(none) [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// OPT-SPIRV-SAME: ptr addrspace(4) noundef captures(none) [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[X]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -339,7 +339,7 @@ struct S { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( -// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 @@ -442,13 +442,13 @@ __global__ void kernel4(struct S s) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel5P1S( -// OPT-SPIRV-SAME: ptr addrspace(1) noundef [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noundef readonly captures(none) [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[S_COERCE]], align 8 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 // OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 -// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[S_COERCE]], i64 8 +// OPT-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[S_COERCE]], i64 8 // OPT-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(1) [[Y]], align 8 // OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 @@ -545,7 +545,7 @@ struct T { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( -// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 // OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 @@ -625,7 +625,7 @@ __global__ void kernel6(struct T t) { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel7Pi( -// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(1) noalias noundef captures(none) [[X_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(1) [[X_COERCE]], align 4 // OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 @@ -692,7 +692,7 @@ struct SS { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( -// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] // OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 // OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 From 8657436730c8ee0b6cb093f56ef0802d2e6fdfc9 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 2 Mar 2025 13:15:20 +0000 Subject: [PATCH 12/14] Update test. --- clang/test/Headers/__clang_hip_math.hip | 26 ++++++++----------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index d448ab134ca4d..3e49deba368cf 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -1075,7 +1075,6 @@ extern "C" __device__ double test_cospi(double x) { return cospi(x); } -// // DEFAULT-LABEL: @test_cyl_bessel_i0f( // DEFAULT-NEXT: entry: // DEFAULT-NEXT: [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]] @@ -1748,7 +1747,6 @@ extern "C" __device__ double test_fmax(double x, double y) { return fmax(x, y); } -// // DEFAULT-LABEL: @test_fminf( // DEFAULT-NEXT: entry: // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract noundef float @llvm.minnum.f32(float [[X:%.*]], float [[Y:%.*]]) @@ -3086,10 +3084,9 @@ extern "C" __device__ long int test_lround(double x) { // AMDGCNSPIRV-LABEL: @test_modff( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15:[0-9]+]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_modf_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17:![0-9]+]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr [[__TMP_I]], align 4, !tbaa [[TBAA17:![0-9]+]] // AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] @@ -3131,10 +3128,9 @@ extern "C" __device__ float test_modff(float x, float* y) { // AMDGCNSPIRV-LABEL: @test_modf( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_modf_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19:![0-9]+]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr [[__TMP_I]], align 8, !tbaa [[TBAA19:![0-9]+]] // AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] @@ -4471,10 +4467,9 @@ extern "C" __device__ double test_remainder(double x, double y) { // AMDGCNSPIRV-LABEL: @test_remquof( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__TMP_I]], align 4, !tbaa [[TBAA13]] // AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret float [[CALL_I]] @@ -4516,10 +4511,9 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) { // AMDGCNSPIRV-LABEL: @test_remquo( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca i32, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func noundef addrspace(4) double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA13]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load i32, ptr [[__TMP_I]], align 4, !tbaa [[TBAA13]] // AMDGCNSPIRV-NEXT: store i32 [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA13]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret double [[CALL_I]] @@ -5230,11 +5224,10 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) { // AMDGCNSPIRV-LABEL: @test_sincosf( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr [[__TMP_I]], align 4, !tbaa [[TBAA17]] // AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void @@ -5279,11 +5272,10 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) { // AMDGCNSPIRV-LABEL: @test_sincos( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr [[__TMP_I]], align 8, !tbaa [[TBAA19]] // AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void @@ -5328,11 +5320,10 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) { // AMDGCNSPIRV-LABEL: @test_sincospif( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca float, align 4 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: store float [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 4, !tbaa [[TBAA17]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(4) [[__TMP_ASCAST_I]], align 4, !tbaa [[TBAA17]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load float, ptr [[__TMP_I]], align 4, !tbaa [[TBAA17]] // AMDGCNSPIRV-NEXT: store float [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 4, !tbaa [[TBAA17]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void @@ -5377,11 +5368,10 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) { // AMDGCNSPIRV-LABEL: @test_sincospi( // AMDGCNSPIRV-NEXT: entry: // AMDGCNSPIRV-NEXT: [[__TMP_I:%.*]] = alloca double, align 8 -// AMDGCNSPIRV-NEXT: [[__TMP_ASCAST_I:%.*]] = addrspacecast ptr [[__TMP_I]] to ptr addrspace(4) // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: [[CALL_I:%.*]] = call contract spir_func addrspace(4) double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr noundef nonnull [[__TMP_I]]) #[[ATTR14]] // AMDGCNSPIRV-NEXT: store double [[CALL_I]], ptr addrspace(4) [[Y:%.*]], align 8, !tbaa [[TBAA19]] -// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr addrspace(4) [[__TMP_ASCAST_I]], align 8, !tbaa [[TBAA19]] +// AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = load double, ptr [[__TMP_I]], align 8, !tbaa [[TBAA19]] // AMDGCNSPIRV-NEXT: store double [[TMP0]], ptr addrspace(4) [[Z:%.*]], align 8, !tbaa [[TBAA19]] // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[__TMP_I]]) #[[ATTR15]] // AMDGCNSPIRV-NEXT: ret void From ce1922a5a20d9db60bc931e4a200dfd909150404 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Sun, 2 Mar 2025 14:18:46 +0000 Subject: [PATCH 13/14] Update test. --- .../InferAddressSpaces/SPIRV/basic.ll | 245 +++++++++++++++++- 1 file changed, 236 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll index 75b23aa30349a..e2652623d02fe 100644 --- a/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll +++ b/llvm/test/Transforms/InferAddressSpaces/SPIRV/basic.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck %s -; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=spirv32-- -passes=infer-address-spaces %s | FileCheck --check-prefix=SPV32 %s +; RUN: opt -S -mtriple=spirv64-- -passes=infer-address-spaces %s | FileCheck --check-prefix=SPV64 %s ; Trivial optimization of generic addressing @@ -10,6 +10,18 @@ define float @load_global_from_flat(ptr addrspace(4) %generic_scalar) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4 ; CHECK-NEXT: ret float [[TMP1]] +; +; SPV32-LABEL: define float @load_global_from_flat( +; SPV32-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] { +; SPV32-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; SPV32-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4 +; SPV32-NEXT: ret float [[TMP1]] +; +; SPV64-LABEL: define float @load_global_from_flat( +; SPV64-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0:[0-9]+]] { +; SPV64-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; SPV64-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(1) [[TMP0]], align 4 +; SPV64-NEXT: ret float [[TMP1]] ; %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) %tmp1 = load float, ptr addrspace(1) %tmp0 @@ -22,6 +34,18 @@ define float @load_group_from_flat(ptr addrspace(4) %generic_scalar) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4 ; CHECK-NEXT: ret float [[TMP1]] +; +; SPV32-LABEL: define float @load_group_from_flat( +; SPV32-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; SPV32-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4 +; SPV32-NEXT: ret float [[TMP1]] +; +; SPV64-LABEL: define float @load_group_from_flat( +; SPV64-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; SPV64-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(3) [[TMP0]], align 4 +; SPV64-NEXT: ret float [[TMP1]] ; %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) %tmp1 = load float, ptr addrspace(3) %tmp0 @@ -34,6 +58,18 @@ define float @load_private_from_flat(ptr addrspace(4) %generic_scalar) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 ; CHECK-NEXT: ret float [[TMP1]] +; +; SPV32-LABEL: define float @load_private_from_flat( +; SPV32-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; SPV32-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 +; SPV32-NEXT: ret float [[TMP1]] +; +; SPV64-LABEL: define float @load_private_from_flat( +; SPV64-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; SPV64-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4 +; SPV64-NEXT: ret float [[TMP1]] ; %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr %tmp1 = load float, ptr %tmp0 @@ -46,6 +82,18 @@ define spir_kernel void @store_global_from_flat(ptr addrspace(4) %generic_scalar ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @store_global_from_flat( +; SPV32-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; SPV32-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @store_global_from_flat( +; SPV64-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(1) +; SPV64-NEXT: store float 0.000000e+00, ptr addrspace(1) [[TMP0]], align 4 +; SPV64-NEXT: ret void ; %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(1) store float 0.0, ptr addrspace(1) %tmp0 @@ -58,6 +106,18 @@ define spir_kernel void @store_group_from_flat(ptr addrspace(4) %generic_scalar) ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @store_group_from_flat( +; SPV32-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; SPV32-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @store_group_from_flat( +; SPV64-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr addrspace(3) +; SPV64-NEXT: store float 0.000000e+00, ptr addrspace(3) [[TMP0]], align 4 +; SPV64-NEXT: ret void ; %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr addrspace(3) store float 0.0, ptr addrspace(3) %tmp0 @@ -70,6 +130,18 @@ define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scala ; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr ; CHECK-NEXT: store float 0.000000e+00, ptr [[TMP0]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @store_private_from_flat( +; SPV32-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; SPV32-NEXT: store float 0.000000e+00, ptr [[TMP0]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @store_private_from_flat( +; SPV64-SAME: ptr addrspace(4) [[GENERIC_SCALAR:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[GENERIC_SCALAR]] to ptr +; SPV64-NEXT: store float 0.000000e+00, ptr [[TMP0]], align 4 +; SPV64-NEXT: ret void ; %tmp0 = addrspacecast ptr addrspace(4) %generic_scalar to ptr store float 0.0, ptr %tmp0 @@ -78,10 +150,22 @@ define spir_kernel void @store_private_from_flat(ptr addrspace(4) %generic_scala define spir_kernel void @load_store_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { ; CHECK-LABEL: define spir_kernel void @load_store_global( -; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) captures(none) [[INPUT:%.*]], ptr addrspace(1) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @load_store_global( +; SPV32-SAME: ptr addrspace(1) captures(none) [[INPUT:%.*]], ptr addrspace(1) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 +; SPV32-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @load_store_global( +; SPV64-SAME: ptr addrspace(1) captures(none) [[INPUT:%.*]], ptr addrspace(1) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(1) [[INPUT]], align 4 +; SPV64-NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUTPUT]], align 4 +; SPV64-NEXT: ret void ; %tmp0 = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) %tmp1 = addrspacecast ptr addrspace(1) %output to ptr addrspace(4) @@ -92,10 +176,22 @@ define spir_kernel void @load_store_global(ptr addrspace(1) nocapture %input, pt define spir_kernel void @load_store_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 { ; CHECK-LABEL: define spir_kernel void @load_store_group( -; CHECK-SAME: ptr addrspace(3) nocapture [[INPUT:%.*]], ptr addrspace(3) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) captures(none) [[INPUT:%.*]], ptr addrspace(3) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @load_store_group( +; SPV32-SAME: ptr addrspace(3) captures(none) [[INPUT:%.*]], ptr addrspace(3) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 +; SPV32-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @load_store_group( +; SPV64-SAME: ptr addrspace(3) captures(none) [[INPUT:%.*]], ptr addrspace(3) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(3) [[INPUT]], align 4 +; SPV64-NEXT: store i32 [[VAL]], ptr addrspace(3) [[OUTPUT]], align 4 +; SPV64-NEXT: ret void ; %tmp0 = addrspacecast ptr addrspace(3) %input to ptr addrspace(4) %tmp1 = addrspacecast ptr addrspace(3) %output to ptr addrspace(4) @@ -106,10 +202,22 @@ define spir_kernel void @load_store_group(ptr addrspace(3) nocapture %input, ptr define spir_kernel void @load_store_private(ptr nocapture %input, ptr nocapture %output) #0 { ; CHECK-LABEL: define spir_kernel void @load_store_private( -; CHECK-SAME: ptr nocapture [[INPUT:%.*]], ptr nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 ; CHECK-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @load_store_private( +; SPV32-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 +; SPV32-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @load_store_private( +; SPV64-SAME: ptr captures(none) [[INPUT:%.*]], ptr captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[VAL:%.*]] = load i32, ptr [[INPUT]], align 4 +; SPV64-NEXT: store i32 [[VAL]], ptr [[OUTPUT]], align 4 +; SPV64-NEXT: ret void ; %tmp0 = addrspacecast ptr %input to ptr addrspace(4) %tmp1 = addrspacecast ptr %output to ptr addrspace(4) @@ -120,10 +228,22 @@ define spir_kernel void @load_store_private(ptr nocapture %input, ptr nocapture define spir_kernel void @load_store_flat(ptr addrspace(4) nocapture %input, ptr addrspace(4) nocapture %output) #0 { ; CHECK-LABEL: define spir_kernel void @load_store_flat( -; CHECK-SAME: ptr addrspace(4) nocapture [[INPUT:%.*]], ptr addrspace(4) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(4) captures(none) [[INPUT:%.*]], ptr addrspace(4) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) [[INPUT]], align 4 ; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(4) [[OUTPUT]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @load_store_flat( +; SPV32-SAME: ptr addrspace(4) captures(none) [[INPUT:%.*]], ptr addrspace(4) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) [[INPUT]], align 4 +; SPV32-NEXT: store i32 [[VAL]], ptr addrspace(4) [[OUTPUT]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @load_store_flat( +; SPV64-SAME: ptr addrspace(4) captures(none) [[INPUT:%.*]], ptr addrspace(4) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) [[INPUT]], align 4 +; SPV64-NEXT: store i32 [[VAL]], ptr addrspace(4) [[OUTPUT]], align 4 +; SPV64-NEXT: ret void ; %val = load i32, ptr addrspace(4) %input, align 4 store i32 %val, ptr addrspace(4) %output, align 4 @@ -132,10 +252,22 @@ define spir_kernel void @load_store_flat(ptr addrspace(4) nocapture %input, ptr define spir_kernel void @store_addrspacecast_ptr_value(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 { ; CHECK-LABEL: define spir_kernel void @store_addrspacecast_ptr_value( -; CHECK-SAME: ptr addrspace(1) nocapture [[INPUT:%.*]], ptr addrspace(1) nocapture [[OUTPUT:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) captures(none) [[INPUT:%.*]], ptr addrspace(1) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) ; CHECK-NEXT: store ptr addrspace(4) [[CAST]], ptr addrspace(1) [[OUTPUT]], align 4 ; CHECK-NEXT: ret void +; +; SPV32-LABEL: define spir_kernel void @store_addrspacecast_ptr_value( +; SPV32-SAME: ptr addrspace(1) captures(none) [[INPUT:%.*]], ptr addrspace(1) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) +; SPV32-NEXT: store ptr addrspace(4) [[CAST]], ptr addrspace(1) [[OUTPUT]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define spir_kernel void @store_addrspacecast_ptr_value( +; SPV64-SAME: ptr addrspace(1) captures(none) [[INPUT:%.*]], ptr addrspace(1) captures(none) [[OUTPUT:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(1) [[INPUT]] to ptr addrspace(4) +; SPV64-NEXT: store ptr addrspace(4) [[CAST]], ptr addrspace(1) [[OUTPUT]], align 4 +; SPV64-NEXT: ret void ; %cast = addrspacecast ptr addrspace(1) %input to ptr addrspace(4) store ptr addrspace(4) %cast, ptr addrspace(1) %output, align 4 @@ -147,6 +279,16 @@ define i32 @atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) # ; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4 ; CHECK-NEXT: ret i32 [[RET]] +; +; SPV32-LABEL: define i32 @atomicrmw_add_global_to_flat( +; SPV32-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4 +; SPV32-NEXT: ret i32 [[RET]] +; +; SPV64-LABEL: define i32 @atomicrmw_add_global_to_flat( +; SPV64-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4 +; SPV64-NEXT: ret i32 [[RET]] ; %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst @@ -158,6 +300,16 @@ define i32 @atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 ; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(3) [[GROUP_PTR]], i32 [[Y]] seq_cst, align 4 ; CHECK-NEXT: ret i32 [[RET]] +; +; SPV32-LABEL: define i32 @atomicrmw_add_group_to_flat( +; SPV32-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(3) [[GROUP_PTR]], i32 [[Y]] seq_cst, align 4 +; SPV32-NEXT: ret i32 [[RET]] +; +; SPV64-LABEL: define i32 @atomicrmw_add_group_to_flat( +; SPV64-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(3) [[GROUP_PTR]], i32 [[Y]] seq_cst, align 4 +; SPV64-NEXT: ret i32 [[RET]] ; %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst @@ -169,6 +321,16 @@ define { i32, i1 } @cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cm ; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(1) [[GLOBAL_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] +; +; SPV32-LABEL: define { i32, i1 } @cmpxchg_global_to_flat( +; SPV32-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(1) [[GLOBAL_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; SPV32-NEXT: ret { i32, i1 } [[RET]] +; +; SPV64-LABEL: define { i32, i1 } @cmpxchg_global_to_flat( +; SPV64-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(1) [[GLOBAL_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; SPV64-NEXT: ret { i32, i1 } [[RET]] ; %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) %ret = cmpxchg ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic @@ -180,6 +342,16 @@ define { i32, i1 } @cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, ; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[GROUP_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] +; +; SPV32-LABEL: define { i32, i1 } @cmpxchg_group_to_flat( +; SPV32-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[GROUP_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; SPV32-NEXT: ret { i32, i1 } [[RET]] +; +; SPV64-LABEL: define { i32, i1 } @cmpxchg_group_to_flat( +; SPV64-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]], i32 [[CMP:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[GROUP_PTR]], i32 [[CMP]], i32 [[VAL]] seq_cst monotonic, align 4 +; SPV64-NEXT: ret { i32, i1 } [[RET]] ; %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr addrspace(4) %ret = cmpxchg ptr addrspace(4) %cast, i32 %cmp, i32 %val seq_cst monotonic @@ -187,6 +359,24 @@ define { i32, i1 } @cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, } define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand(ptr addrspace(3) %cas.ptr, ptr addrspace(3) %cmp.ptr, ptr addrspace(4) %val) #0 { +; CHECK-LABEL: define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand( +; CHECK-SAME: ptr addrspace(3) [[CAS_PTR:%.*]], ptr addrspace(3) [[CMP_PTR:%.*]], ptr addrspace(4) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST_CMP:%.*]] = addrspacecast ptr addrspace(3) [[CMP_PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[CAS_PTR]], ptr addrspace(4) [[CAST_CMP]], ptr addrspace(4) [[VAL]] seq_cst monotonic, align 8 +; CHECK-NEXT: ret { ptr addrspace(4), i1 } [[RET]] +; +; SPV32-LABEL: define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand( +; SPV32-SAME: ptr addrspace(3) [[CAS_PTR:%.*]], ptr addrspace(3) [[CMP_PTR:%.*]], ptr addrspace(4) [[VAL:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[CAST_CMP:%.*]] = addrspacecast ptr addrspace(3) [[CMP_PTR]] to ptr addrspace(4) +; SPV32-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[CAS_PTR]], ptr addrspace(4) [[CAST_CMP]], ptr addrspace(4) [[VAL]] seq_cst monotonic, align 4 +; SPV32-NEXT: ret { ptr addrspace(4), i1 } [[RET]] +; +; SPV64-LABEL: define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand( +; SPV64-SAME: ptr addrspace(3) [[CAS_PTR:%.*]], ptr addrspace(3) [[CMP_PTR:%.*]], ptr addrspace(4) [[VAL:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[CAST_CMP:%.*]] = addrspacecast ptr addrspace(3) [[CMP_PTR]] to ptr addrspace(4) +; SPV64-NEXT: [[RET:%.*]] = cmpxchg ptr addrspace(3) [[CAS_PTR]], ptr addrspace(4) [[CAST_CMP]], ptr addrspace(4) [[VAL]] seq_cst monotonic, align 8 +; SPV64-NEXT: ret { ptr addrspace(4), i1 } [[RET]] +; %cast.cmp = addrspacecast ptr addrspace(3) %cmp.ptr to ptr addrspace(4) %ret = cmpxchg ptr addrspace(3) %cas.ptr, ptr addrspace(4) %cast.cmp, ptr addrspace(4) %val seq_cst monotonic ret { ptr addrspace(4), i1 } %ret @@ -194,13 +384,29 @@ define { ptr addrspace(4), i1 } @cmpxchg_group_to_flat_wrong_operand(ptr addrspa define void @local_nullptr(ptr addrspace(1) nocapture %results, ptr addrspace(3) %a) { ; CHECK-LABEL: define void @local_nullptr( -; CHECK-SAME: ptr addrspace(1) nocapture [[RESULTS:%.*]], ptr addrspace(3) [[A:%.*]]) { +; CHECK-SAME: ptr addrspace(1) captures(none) [[RESULTS:%.*]], ptr addrspace(3) [[A:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne ptr addrspace(3) [[A]], addrspacecast (ptr null to ptr addrspace(3)) ; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32 ; CHECK-NEXT: store i32 [[CONV]], ptr addrspace(1) [[RESULTS]], align 4 ; CHECK-NEXT: ret void ; +; SPV32-LABEL: define void @local_nullptr( +; SPV32-SAME: ptr addrspace(1) captures(none) [[RESULTS:%.*]], ptr addrspace(3) [[A:%.*]]) { +; SPV32-NEXT: [[ENTRY:.*:]] +; SPV32-NEXT: [[TOBOOL:%.*]] = icmp ne ptr addrspace(3) [[A]], addrspacecast (ptr null to ptr addrspace(3)) +; SPV32-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32 +; SPV32-NEXT: store i32 [[CONV]], ptr addrspace(1) [[RESULTS]], align 4 +; SPV32-NEXT: ret void +; +; SPV64-LABEL: define void @local_nullptr( +; SPV64-SAME: ptr addrspace(1) captures(none) [[RESULTS:%.*]], ptr addrspace(3) [[A:%.*]]) { +; SPV64-NEXT: [[ENTRY:.*:]] +; SPV64-NEXT: [[TOBOOL:%.*]] = icmp ne ptr addrspace(3) [[A]], addrspacecast (ptr null to ptr addrspace(3)) +; SPV64-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32 +; SPV64-NEXT: store i32 [[CONV]], ptr addrspace(1) [[RESULTS]], align 4 +; SPV64-NEXT: ret void +; entry: %tobool = icmp ne ptr addrspace(3) %a, addrspacecast (ptr null to ptr addrspace(3)) %conv = zext i1 %tobool to i32 @@ -213,6 +419,16 @@ define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md(ptr addrspace(1) %gl ; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] ; CHECK-NEXT: ret i32 [[RET]] +; +; SPV32-LABEL: define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md( +; SPV32-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; SPV32-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] +; SPV32-NEXT: ret i32 [[RET]] +; +; SPV64-LABEL: define i32 @atomicrmw_add_global_to_flat_preserve_amdgpu_md( +; SPV64-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] { +; SPV64-NEXT: [[RET:%.*]] = atomicrmw add ptr addrspace(1) [[GLOBAL_PTR]], i32 [[Y]] seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.no.remote.memory [[META0]] +; SPV64-NEXT: ret i32 [[RET]] ; %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr addrspace(4) %ret = atomicrmw add ptr addrspace(4) %cast, i32 %y seq_cst, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.no.remote.memory !0 @@ -223,6 +439,14 @@ define ptr addrspace(4) @try_infer_getelementptr_constant_null() { ; CHECK-LABEL: define ptr addrspace(4) @try_infer_getelementptr_constant_null() { ; CHECK-NEXT: [[CE:%.*]] = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 ; CHECK-NEXT: ret ptr addrspace(4) [[CE]] +; +; SPV32-LABEL: define ptr addrspace(4) @try_infer_getelementptr_constant_null() { +; SPV32-NEXT: [[CE:%.*]] = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 +; SPV32-NEXT: ret ptr addrspace(4) [[CE]] +; +; SPV64-LABEL: define ptr addrspace(4) @try_infer_getelementptr_constant_null() { +; SPV64-NEXT: [[CE:%.*]] = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 +; SPV64-NEXT: ret ptr addrspace(4) [[CE]] ; %ce = getelementptr i8, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) null, i64 8), i64 0 ret ptr addrspace(4) %ce @@ -231,6 +455,9 @@ define ptr addrspace(4) @try_infer_getelementptr_constant_null() { attributes #0 = { nounwind } !0 = !{} -;. ; CHECK: [[META0]] = !{} ;. +; SPV32: [[META0]] = !{} +;. +; SPV64: [[META0]] = !{} +;. From 2bc152a42dd43577db9bfdf96551f6f6166b83c8 Mon Sep 17 00:00:00 2001 From: Alex Voicu Date: Mon, 10 Mar 2025 15:13:07 +0000 Subject: [PATCH 14/14] Do not use magic constant directly. --- llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index 8cd2f538d9bb5..b440870f354dc 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -108,18 +108,19 @@ enum AddressSpace { UniformConstant = storageClassToAddressSpace(SPIRV::StorageClass::UniformConstant), Workgroup = storageClassToAddressSpace(SPIRV::StorageClass::Workgroup), - Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic) + Generic = storageClassToAddressSpace(SPIRV::StorageClass::Generic), + Invalid = UINT32_MAX }; unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { // TODO: we only enable this for AMDGCN flavoured SPIR-V, where we know it to // be correct; this might be relaxed in the future. if (getTargetTriple().getVendor() != Triple::VendorType::AMD) - return UINT32_MAX; + return Invalid; const auto *LD = dyn_cast(V); if (!LD) - return UINT32_MAX; + return Invalid; // It must be a load from a pointer to Generic. assert(V->getType()->isPointerTy() && @@ -127,7 +128,7 @@ unsigned SPIRVTargetMachine::getAssumedAddrSpace(const Value *V) const { const auto *Ptr = LD->getPointerOperand(); if (Ptr->getType()->getPointerAddressSpace() != AddressSpace::UniformConstant) - return UINT32_MAX; + return Invalid; // For a load from a pointer to UniformConstant, we can infer CrossWorkgroup // storage, as this could only have been legally initialised with a // CrossWorkgroup (aka device) constant pointer.