From abfcc87a4f47c09e4fd2fe7f68f1d5ce26dce678 Mon Sep 17 00:00:00 2001 From: skc7 Date: Thu, 7 Mar 2024 12:40:41 +0530 Subject: [PATCH] [AMDGPU] Introduce amdgpu-sw-lower-lds pass to lower LDS accesses. (#87265) --- llvm/lib/Target/AMDGPU/AMDGPU.h | 11 + llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 1335 +++++++++++++++++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 + ...-lower-lds-dynamic-indirect-access-asan.ll | 260 ++++ ...pu-sw-lower-lds-dynamic-indirect-access.ll | 142 ++ ...dgpu-sw-lower-lds-dynamic-lds-test-asan.ll | 113 ++ .../amdgpu-sw-lower-lds-dynamic-lds-test.ll | 87 ++ ...lti-static-dynamic-indirect-access-asan.ll | 385 +++++ ...ds-multi-static-dynamic-indirect-access.ll | 227 +++ ...w-lower-lds-multiple-blocks-return-asan.ll | 112 ++ ...gpu-sw-lower-lds-multiple-blocks-return.ll | 112 ++ ...lds-static-dynamic-indirect-access-asan.ll | 261 ++++ ...ower-lds-static-dynamic-indirect-access.ll | 143 ++ ...-lower-lds-static-dynamic-lds-test-asan.ll | 213 +++ ...pu-sw-lower-lds-static-dynamic-lds-test.ll | 120 ++ ...w-lower-lds-static-indirect-access-asan.ll | 224 +++ ...tic-indirect-access-function-param-asan.ll | 152 ++ ...s-static-indirect-access-function-param.ll | 102 ++ ...-lds-static-indirect-access-nested-asan.ll | 279 ++++ ...lower-lds-static-indirect-access-nested.ll | 279 ++++ ...gpu-sw-lower-lds-static-indirect-access.ll | 128 ++ ...mdgpu-sw-lower-lds-static-lds-test-asan.ll | 159 ++ ...lds-static-lds-test-atomic-cmpxchg-asan.ll | 132 ++ ...ower-lds-static-lds-test-atomicrmw-asan.ll | 214 +++ .../amdgpu-sw-lower-lds-static-lds-test.ll | 85 ++ 27 files changed, 5278 insertions(+) create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index afb8f2d93f0f1..c50474893eb7d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -268,6 +268,17 @@ struct AMDGPUAlwaysInlinePass : PassInfoMixin { bool GlobalOpt; }; +void initializeAMDGPUSwLowerLDSLegacyPass(PassRegistry &); +extern char &AMDGPUSwLowerLDSLegacyPassID; +ModulePass * +createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr); + +struct AMDGPUSwLowerLDSPass : PassInfoMixin { + const AMDGPUTargetMachine &TM; + AMDGPUSwLowerLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + class AMDGPUCodeGenPreparePass : public PassInfoMixin { private: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 5c068b5695c8d..d8741b4b06a98 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -20,6 +20,7 @@ MODULE_PASS("amdgpu-always-inline", AMDGPUAlwaysInlinePass()) MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) +MODULE_PASS("amdgpu-sw-lower-lds", AMDGPUSwLowerLDSPass(*this)) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) MODULE_PASS("amdgpu-perf-hint", AMDGPUPerfHintAnalysisPass( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp new file mode 100644 index 0000000000000..d2ff7a154d0df --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp @@ -0,0 +1,1335 @@ +//===-- AMDGPUSwLowerLDS.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers the local data store, LDS, uses in kernel and non-kernel +// functions in module to use dynamically allocated global memory. +// Packed LDS Layout is emulated in the global memory. +// The lowered memory instructions from LDS to global memory are then +// instrumented for address sanitizer, to catch addressing errors. +// This pass only work when address sanitizer has been enabled and has +// instrumented the IR. It identifies that IR has been instrumented using +// "nosanitize_address" module flag. +// +// Replacement of Kernel LDS accesses: +// For a kernel, LDS access can be static or dynamic which are direct +// (accessed within kernel) and indirect (accessed through non-kernels). +// All these LDS accesses corresponding to kernel will be packed together, +// where all static LDS accesses will be allocated first and then dynamic +// LDS follows. The total size with alignment is calculated. A new LDS global +// will be created for the kernel called "SW LDS" and it will have the +// attribute "amdgpu-lds-size" attached with value of the size calculated. +// All the LDS accesses in the module will be replaced by GEP with offset +// into the "Sw LDS". +// A new "llvm.amdgcn..dynlds" is created per kernel accessing +// the dynamic LDS. This will be marked used by kernel and will have +// MD_absolue_symbol metadata set to total static LDS size, Since dynamic +// LDS allocation starts after all static LDS allocation. +// +// A device global memory equal to the total LDS size will be allocated. +// At the prologue of the kernel, a single work-item from the +// work-group, does a "malloc" and stores the pointer of the +// allocation in "SW LDS". +// +// To store the offsets corresponding to all LDS accesses, another global +// variable is created which will be called "SW LDS metadata" in this pass. +// - SW LDS Global: +// It is LDS global of ptr type with name +// "llvm.amdgcn.sw.lds.". +// - Metadata Global: +// It is of struct type, with n members. n equals the number of LDS +// globals accessed by the kernel(direct and indirect). Each member of +// struct is another struct of type {i32, i32, i32}. First member +// corresponds to offset, second member corresponds to size of LDS global +// being replaced and third represents the total aligned size. It will +// have name "llvm.amdgcn.sw.lds..md". This global will have +// an intializer with static LDS related offsets and sizes initialized. +// But for dynamic LDS related entries, offsets will be intialized to +// previous static LDS allocation end offset. Sizes for them will be zero +// initially. These dynamic LDS offset and size values will be updated +// within the kernel, since kernel can read the dynamic LDS size +// allocation done at runtime with query to "hidden_dynamic_lds_size" +// hidden kernel argument. +// +// At the epilogue of kernel, allocated memory would be made free by the same +// single work-item. +// +// Replacement of non-kernel LDS accesses: +// Multiple kernels can access the same non-kernel function. +// All the kernels accessing LDS through non-kernels are sorted and +// assigned a kernel-id. All the LDS globals accessed by non-kernels +// are sorted. This information is used to build two tables: +// - Base table: +// Base table will have single row, with elements of the row +// placed as per kernel ID. Each element in the row corresponds +// to ptr of "SW LDS" variable created for that kernel. +// - Offset table: +// Offset table will have multiple rows and columns. +// Rows are assumed to be from 0 to (n-1). n is total number +// of kernels accessing the LDS through non-kernels. +// Each row will have m elements. m is the total number of +// unique LDS globals accessed by all non-kernels. +// Each element in the row correspond to the ptr of +// the replacement of LDS global done by that particular kernel. +// A LDS variable in non-kernel will be replaced based on the information +// from base and offset tables. Based on kernel-id query, ptr of "SW +// LDS" for that corresponding kernel is obtained from base table. +// The Offset into the base "SW LDS" is obtained from +// corresponding element in offset table. With this information, replacement +// value is obtained. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUAsanInstrumentation.h" +#include "AMDGPUMemoryUtils.h" +#include "AMDGPUTargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/ReplaceConstant.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include + +#define DEBUG_TYPE "amdgpu-sw-lower-lds" +#define COV5_HIDDEN_DYN_LDS_SIZE_ARG 15 + +using namespace llvm; +using namespace AMDGPU; + +namespace { + +cl::opt + AsanInstrumentLDS("amdgpu-asan-instrument-lds", + cl::desc("Run asan instrumentation on LDS instructions " + "lowered to global memory"), + cl::init(true), cl::Hidden); + +using DomTreeCallback = function_ref; + +struct LDSAccessTypeInfo { + SetVector StaticLDSGlobals; + SetVector DynamicLDSGlobals; +}; + +// Struct to hold all the Metadata required for a kernel +// to replace a LDS global uses with corresponding offset +// in to device global memory. +struct KernelLDSParameters { + GlobalVariable *SwLDS = nullptr; + GlobalVariable *SwDynLDS = nullptr; + GlobalVariable *SwLDSMetadata = nullptr; + LDSAccessTypeInfo DirectAccess; + LDSAccessTypeInfo IndirectAccess; + DenseMap> + LDSToReplacementIndicesMap; + uint32_t MallocSize = 0; + uint32_t LDSSize = 0; + SmallVector, 64> RedzoneOffsetAndSizeVector; +}; + +// Struct to store information for creation of offset table +// for all the non-kernel LDS accesses. +struct NonKernelLDSParameters { + GlobalVariable *LDSBaseTable = nullptr; + GlobalVariable *LDSOffsetTable = nullptr; + SetVector OrderedKernels; + SetVector OrdereLDSGlobals; +}; + +struct AsanInstrumentInfo { + int Scale = 0; + uint32_t Offset = 0; + SetVector Instructions; +}; + +struct FunctionsAndLDSAccess { + DenseMap KernelToLDSParametersMap; + SetVector KernelsWithIndirectLDSAccess; + SetVector NonKernelsWithLDSArgument; + SetVector AllNonKernelLDSAccess; + FunctionVariableMap NonKernelToLDSAccessMap; +}; + +class AMDGPUSwLowerLDS { +public: + AMDGPUSwLowerLDS(Module &Mod, const AMDGPUTargetMachine &TM, + DomTreeCallback Callback) + : M(Mod), AMDGPUTM(TM), IRB(M.getContext()), DTCallback(Callback) {} + bool run(); + void getUsesOfLDSByNonKernels(); + void getNonKernelsWithLDSArguments(const CallGraph &CG); + SetVector + getOrderedIndirectLDSAccessingKernels(SetVector &Kernels); + SetVector + getOrderedNonKernelAllLDSGlobals(SetVector &Variables); + void buildSwLDSGlobal(Function *Func); + void buildSwDynLDSGlobal(Function *Func); + void populateSwMetadataGlobal(Function *Func); + void populateSwLDSAttributeAndMetadata(Function *Func); + void populateLDSToReplacementIndicesMap(Function *Func); + void getLDSMemoryInstructions(Function *Func, + SetVector &LDSInstructions); + void replaceKernelLDSAccesses(Function *Func); + Value *getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr, + Value *LDSPtr); + void translateLDSMemoryOperationsToGlobalMemory( + Function *Func, Value *LoadMallocPtr, + SetVector &LDSInstructions); + void poisonRedzones(Function *Func, Value *MallocPtr); + void lowerKernelLDSAccesses(Function *Func, DomTreeUpdater &DTU); + void buildNonKernelLDSOffsetTable(NonKernelLDSParameters &NKLDSParams); + void buildNonKernelLDSBaseTable(NonKernelLDSParameters &NKLDSParams); + Constant * + getAddressesOfVariablesInKernel(Function *Func, + SetVector &Variables); + void lowerNonKernelLDSAccesses(Function *Func, + SetVector &LDSGlobals, + NonKernelLDSParameters &NKLDSParams); + void + updateMallocSizeForDynamicLDS(Function *Func, Value **CurrMallocSize, + Value *HiddenDynLDSSize, + SetVector &DynamicLDSGlobals); + void initAsanInfo(); + +private: + Module &M; + const AMDGPUTargetMachine &AMDGPUTM; + IRBuilder<> IRB; + DomTreeCallback DTCallback; + FunctionsAndLDSAccess FuncLDSAccessInfo; + AsanInstrumentInfo AsanInfo; +}; + +template SetVector sortByName(std::vector &&V) { + // Sort the vector of globals or Functions based on their name. + // Returns a SetVector of globals/Functions. + sort(V, [](const auto *L, const auto *R) { + return L->getName() < R->getName(); + }); + return {SetVector(V.begin(), V.end())}; +} + +SetVector AMDGPUSwLowerLDS::getOrderedNonKernelAllLDSGlobals( + SetVector &Variables) { + // Sort all the non-kernel LDS accesses based on their name. + return sortByName( + std::vector(Variables.begin(), Variables.end())); +} + +SetVector AMDGPUSwLowerLDS::getOrderedIndirectLDSAccessingKernels( + SetVector &Kernels) { + // Sort the non-kernels accessing LDS based on their name. + // Also assign a kernel ID metadata based on the sorted order. + LLVMContext &Ctx = M.getContext(); + if (Kernels.size() > UINT32_MAX) { + report_fatal_error("Unimplemented SW LDS lowering for > 2**32 kernels"); + } + SetVector OrderedKernels = + sortByName(std::vector(Kernels.begin(), Kernels.end())); + for (size_t i = 0; i < Kernels.size(); i++) { + Metadata *AttrMDArgs[1] = { + ConstantAsMetadata::get(IRB.getInt32(i)), + }; + Function *Func = OrderedKernels[i]; + Func->setMetadata("llvm.amdgcn.lds.kernel.id", + MDNode::get(Ctx, AttrMDArgs)); + } + return std::move(OrderedKernels); +} + +void AMDGPUSwLowerLDS::getNonKernelsWithLDSArguments(const CallGraph &CG) { + // Among the kernels accessing LDS, get list of + // Non-kernels to which a call is made and a ptr + // to addrspace(3) is passed as argument. + for (auto &K : FuncLDSAccessInfo.KernelToLDSParametersMap) { + Function *Func = K.first; + const CallGraphNode *CGN = CG[Func]; + if (!CGN) + continue; + for (auto &I : *CGN) { + CallGraphNode *CallerCGN = I.second; + Function *CalledFunc = CallerCGN->getFunction(); + if (!CalledFunc) + continue; + if (AMDGPU::isKernelLDS(CalledFunc)) + continue; + for (auto AI = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); + AI != E; ++AI) { + Type *ArgTy = (*AI).getType(); + if (!ArgTy->isPointerTy()) + continue; + if (ArgTy->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + continue; + FuncLDSAccessInfo.NonKernelsWithLDSArgument.insert(CalledFunc); + // Also add the Calling function to KernelsWithIndirectLDSAccess list + // so that base table of LDS is generated. + FuncLDSAccessInfo.KernelsWithIndirectLDSAccess.insert(Func); + } + } + } +} + +void AMDGPUSwLowerLDS::getUsesOfLDSByNonKernels() { + for (GlobalVariable *GV : FuncLDSAccessInfo.AllNonKernelLDSAccess) { + if (!AMDGPU::isLDSVariableToLower(*GV)) + continue; + + for (User *V : GV->users()) { + if (auto *I = dyn_cast(V)) { + Function *F = I->getFunction(); + if (!isKernelLDS(F) && F->hasFnAttribute(Attribute::SanitizeAddress)) + FuncLDSAccessInfo.NonKernelToLDSAccessMap[F].insert(GV); + } + } + } +} + +static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV, + uint32_t Address) { + // Write the specified address into metadata where it can be retrieved by + // the assembler. Format is a half open range, [Address Address+1) + LLVMContext &Ctx = M.getContext(); + auto *IntTy = M.getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS); + MDBuilder MDB(Ctx); + MDNode *MetadataNode = MDB.createRange(ConstantInt::get(IntTy, Address), + ConstantInt::get(IntTy, Address + 1)); + GV->setMetadata(LLVMContext::MD_absolute_symbol, MetadataNode); +} + +static void addLDSSizeAttribute(Function *Func, uint32_t Offset, + bool IsDynLDS) { + if (Offset != 0) { + std::string Buffer; + raw_string_ostream SS{Buffer}; + SS << Offset; + if (IsDynLDS) + SS << "," << Offset; + Func->addFnAttr("amdgpu-lds-size", Buffer); + } +} + +static void markUsedByKernel(Function *Func, GlobalVariable *SGV) { + BasicBlock *Entry = &Func->getEntryBlock(); + IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt()); + + Function *Decl = + Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {}); + + Value *UseInstance[1] = { + Builder.CreateConstInBoundsGEP1_32(SGV->getValueType(), SGV, 0)}; + + Builder.CreateCall(Decl, {}, + {OperandBundleDefT("ExplicitUse", UseInstance)}); +} + +void AMDGPUSwLowerLDS::buildSwLDSGlobal(Function *Func) { + // Create new LDS global required for each kernel to store + // device global memory pointer. + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + // Create new global pointer variable + LDSParams.SwLDS = new GlobalVariable( + M, IRB.getPtrTy(), false, GlobalValue::InternalLinkage, + PoisonValue::get(IRB.getPtrTy()), "llvm.amdgcn.sw.lds." + Func->getName(), + nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false); + GlobalValue::SanitizerMetadata MD; + MD.NoAddress = true; + LDSParams.SwLDS->setSanitizerMetadata(MD); + return; +} + +void AMDGPUSwLowerLDS::buildSwDynLDSGlobal(Function *Func) { + // Create new Dyn LDS global if kernel accesses dyn LDS. + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + if (LDSParams.DirectAccess.DynamicLDSGlobals.empty() && + LDSParams.IndirectAccess.DynamicLDSGlobals.empty()) + return; + // Create new global pointer variable + auto emptyCharArray = ArrayType::get(IRB.getInt8Ty(), 0); + LDSParams.SwDynLDS = new GlobalVariable( + M, emptyCharArray, false, GlobalValue::ExternalLinkage, nullptr, + "llvm.amdgcn." + Func->getName() + ".dynlds", nullptr, + GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS, false); + markUsedByKernel(Func, LDSParams.SwDynLDS); + GlobalValue::SanitizerMetadata MD; + MD.NoAddress = true; + LDSParams.SwDynLDS->setSanitizerMetadata(MD); + return; +} + +void AMDGPUSwLowerLDS::populateSwLDSAttributeAndMetadata(Function *Func) { + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + bool IsDynLDSUsed = LDSParams.SwDynLDS ? true : false; + uint32_t Offset = LDSParams.LDSSize; + recordLDSAbsoluteAddress(M, LDSParams.SwLDS, 0); + addLDSSizeAttribute(Func, Offset, IsDynLDSUsed); + if (LDSParams.SwDynLDS) + recordLDSAbsoluteAddress(M, LDSParams.SwDynLDS, Offset); +} + +void AMDGPUSwLowerLDS::populateSwMetadataGlobal(Function *Func) { + // Create new metadata global for every kernel and initialize the + // start offsets and sizes corresponding to each LDS accesses. + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + auto &Ctx = M.getContext(); + auto &DL = M.getDataLayout(); + std::vector Items; + Type *Int32Ty = IRB.getInt32Ty(); + std::vector Initializers; + Align MaxAlignment(1); + auto UpdateMaxAlignment = [&MaxAlignment, &DL](GlobalVariable *GV) { + Align GVAlign = AMDGPU::getAlign(DL, GV); + MaxAlignment = std::max(MaxAlignment, GVAlign); + }; + + for (GlobalVariable *GV : LDSParams.DirectAccess.StaticLDSGlobals) + UpdateMaxAlignment(GV); + + for (GlobalVariable *GV : LDSParams.DirectAccess.DynamicLDSGlobals) + UpdateMaxAlignment(GV); + + for (GlobalVariable *GV : LDSParams.IndirectAccess.StaticLDSGlobals) + UpdateMaxAlignment(GV); + + for (GlobalVariable *GV : LDSParams.IndirectAccess.DynamicLDSGlobals) + UpdateMaxAlignment(GV); + + //{StartOffset, AlignedSizeInBytes} + SmallString<128> MDItemStr; + raw_svector_ostream MDItemOS(MDItemStr); + MDItemOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md.item"; + + StructType *LDSItemTy = + StructType::create(Ctx, {Int32Ty, Int32Ty, Int32Ty}, MDItemOS.str()); + uint32_t &MallocSize = LDSParams.MallocSize; + SetVector UniqueLDSGlobals; + int AsanScale = AsanInfo.Scale; + auto buildInitializerForSwLDSMD = + [&](SetVector &LDSGlobals) { + for (auto &GV : LDSGlobals) { + if (is_contained(UniqueLDSGlobals, GV)) + continue; + UniqueLDSGlobals.insert(GV); + + Type *Ty = GV->getValueType(); + const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); + Items.push_back(LDSItemTy); + Constant *ItemStartOffset = ConstantInt::get(Int32Ty, MallocSize); + Constant *SizeInBytesConst = ConstantInt::get(Int32Ty, SizeInBytes); + // Get redzone size corresponding a size. + const uint64_t RightRedzoneSize = + AMDGPU::getRedzoneSizeForGlobal(AsanScale, SizeInBytes); + // Update MallocSize with current size and redzone size. + MallocSize += SizeInBytes; + if (!AMDGPU::isDynamicLDS(*GV)) + LDSParams.RedzoneOffsetAndSizeVector.emplace_back(MallocSize, + RightRedzoneSize); + MallocSize += RightRedzoneSize; + // Align current size plus redzone. + uint64_t AlignedSize = + alignTo(SizeInBytes + RightRedzoneSize, MaxAlignment); + Constant *AlignedSizeInBytesConst = + ConstantInt::get(Int32Ty, AlignedSize); + // Align MallocSize + MallocSize = alignTo(MallocSize, MaxAlignment); + Constant *InitItem = + ConstantStruct::get(LDSItemTy, {ItemStartOffset, SizeInBytesConst, + AlignedSizeInBytesConst}); + Initializers.push_back(InitItem); + } + }; + SetVector SwLDSVector; + SwLDSVector.insert(LDSParams.SwLDS); + buildInitializerForSwLDSMD(SwLDSVector); + buildInitializerForSwLDSMD(LDSParams.DirectAccess.StaticLDSGlobals); + buildInitializerForSwLDSMD(LDSParams.IndirectAccess.StaticLDSGlobals); + buildInitializerForSwLDSMD(LDSParams.DirectAccess.DynamicLDSGlobals); + buildInitializerForSwLDSMD(LDSParams.IndirectAccess.DynamicLDSGlobals); + + // Update the LDS size used by the kernel. + Type *Ty = LDSParams.SwLDS->getValueType(); + const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); + uint64_t AlignedSize = alignTo(SizeInBytes, MaxAlignment); + LDSParams.LDSSize = AlignedSize; + SmallString<128> MDTypeStr; + raw_svector_ostream MDTypeOS(MDTypeStr); + MDTypeOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md.type"; + StructType *MetadataStructType = + StructType::create(Ctx, Items, MDTypeOS.str()); + SmallString<128> MDStr; + raw_svector_ostream MDOS(MDStr); + MDOS << "llvm.amdgcn.sw.lds." << Func->getName() << ".md"; + LDSParams.SwLDSMetadata = new GlobalVariable( + M, MetadataStructType, false, GlobalValue::InternalLinkage, + PoisonValue::get(MetadataStructType), MDOS.str(), nullptr, + GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS, false); + Constant *data = ConstantStruct::get(MetadataStructType, Initializers); + LDSParams.SwLDSMetadata->setInitializer(data); + assert(LDSParams.SwLDS); + // Set the alignment to MaxAlignment for SwLDS. + LDSParams.SwLDS->setAlignment(MaxAlignment); + if (LDSParams.SwDynLDS) + LDSParams.SwDynLDS->setAlignment(MaxAlignment); + GlobalValue::SanitizerMetadata MD; + MD.NoAddress = true; + LDSParams.SwLDSMetadata->setSanitizerMetadata(MD); + return; +} + +void AMDGPUSwLowerLDS::populateLDSToReplacementIndicesMap(Function *Func) { + // Fill the corresponding LDS replacement indices for each LDS access + // related to this kernel. + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + SetVector UniqueLDSGlobals; + auto PopulateIndices = [&](SetVector &LDSGlobals, + uint32_t &Idx) { + for (auto &GV : LDSGlobals) { + if (is_contained(UniqueLDSGlobals, GV)) + continue; + UniqueLDSGlobals.insert(GV); + LDSParams.LDSToReplacementIndicesMap[GV] = {0, Idx, 0}; + ++Idx; + } + }; + uint32_t Idx = 0; + SetVector SwLDSVector; + SwLDSVector.insert(LDSParams.SwLDS); + PopulateIndices(SwLDSVector, Idx); + PopulateIndices(LDSParams.DirectAccess.StaticLDSGlobals, Idx); + PopulateIndices(LDSParams.IndirectAccess.StaticLDSGlobals, Idx); + PopulateIndices(LDSParams.DirectAccess.DynamicLDSGlobals, Idx); + PopulateIndices(LDSParams.IndirectAccess.DynamicLDSGlobals, Idx); + return; +} + +static void replacesUsesOfGlobalInFunction(Function *Func, GlobalVariable *GV, + Value *Replacement) { + // Replace all uses of LDS global in this Function with a Replacement. + auto ReplaceUsesLambda = [Func](const Use &U) -> bool { + auto *V = U.getUser(); + if (auto *Inst = dyn_cast(V)) { + auto *Func1 = Inst->getParent()->getParent(); + if (Func == Func1) + return true; + } + return false; + }; + GV->replaceUsesWithIf(Replacement, ReplaceUsesLambda); +} + +void AMDGPUSwLowerLDS::replaceKernelLDSAccesses(Function *Func) { + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + GlobalVariable *SwLDS = LDSParams.SwLDS; + assert(SwLDS); + GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata; + assert(SwLDSMetadata); + StructType *SwLDSMetadataStructType = + cast(SwLDSMetadata->getValueType()); + Type *Int32Ty = IRB.getInt32Ty(); + auto &IndirectAccess = LDSParams.IndirectAccess; + auto &DirectAccess = LDSParams.DirectAccess; + // Replace all uses of LDS global in this Function with a Replacement. + SetVector UniqueLDSGlobals; + auto ReplaceLDSGlobalUses = [&](SetVector &LDSGlobals) { + for (auto &GV : LDSGlobals) { + // Do not generate instructions if LDS access is in non-kernel + // i.e indirect-access. + if ((IndirectAccess.StaticLDSGlobals.contains(GV) || + IndirectAccess.DynamicLDSGlobals.contains(GV)) && + (!DirectAccess.StaticLDSGlobals.contains(GV) && + !DirectAccess.DynamicLDSGlobals.contains(GV))) + continue; + if (is_contained(UniqueLDSGlobals, GV)) + continue; + UniqueLDSGlobals.insert(GV); + auto &Indices = LDSParams.LDSToReplacementIndicesMap[GV]; + assert(Indices.size() == 3); + Constant *GEPIdx[] = {ConstantInt::get(Int32Ty, Indices[0]), + ConstantInt::get(Int32Ty, Indices[1]), + ConstantInt::get(Int32Ty, Indices[2])}; + Constant *GEP = ConstantExpr::getGetElementPtr( + SwLDSMetadataStructType, SwLDSMetadata, GEPIdx, true); + Value *Offset = IRB.CreateLoad(Int32Ty, GEP); + Value *BasePlusOffset = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), SwLDS, {Offset}); + LLVM_DEBUG(GV->printAsOperand(dbgs() << "Sw LDS Lowering, Replacing LDS ", + false)); + replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset); + } + }; + ReplaceLDSGlobalUses(DirectAccess.StaticLDSGlobals); + ReplaceLDSGlobalUses(IndirectAccess.StaticLDSGlobals); + ReplaceLDSGlobalUses(DirectAccess.DynamicLDSGlobals); + ReplaceLDSGlobalUses(IndirectAccess.DynamicLDSGlobals); +} + +void AMDGPUSwLowerLDS::updateMallocSizeForDynamicLDS( + Function *Func, Value **CurrMallocSize, Value *HiddenDynLDSSize, + SetVector &DynamicLDSGlobals) { + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + Type *Int32Ty = IRB.getInt32Ty(); + + GlobalVariable *SwLDS = LDSParams.SwLDS; + GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata; + assert(SwLDS && SwLDSMetadata); + StructType *MetadataStructType = + cast(SwLDSMetadata->getValueType()); + unsigned MaxAlignment = SwLDS->getAlignment(); + Value *MaxAlignValue = IRB.getInt32(MaxAlignment); + Value *MaxAlignValueMinusOne = IRB.getInt32(MaxAlignment - 1); + + for (GlobalVariable *DynGV : DynamicLDSGlobals) { + auto &Indices = LDSParams.LDSToReplacementIndicesMap[DynGV]; + // Update the Offset metadata. + Constant *Index0 = ConstantInt::get(Int32Ty, 0); + Constant *Index1 = ConstantInt::get(Int32Ty, Indices[1]); + + Constant *Index2Offset = ConstantInt::get(Int32Ty, 0); + auto *GEPForOffset = IRB.CreateInBoundsGEP( + MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2Offset}); + + IRB.CreateStore(*CurrMallocSize, GEPForOffset); + // Update the size and Aligned Size metadata. + Constant *Index2Size = ConstantInt::get(Int32Ty, 1); + auto *GEPForSize = IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata, + {Index0, Index1, Index2Size}); + + Value *CurrDynLDSSize = IRB.CreateLoad(Int32Ty, HiddenDynLDSSize); + IRB.CreateStore(CurrDynLDSSize, GEPForSize); + Constant *Index2AlignedSize = ConstantInt::get(Int32Ty, 2); + auto *GEPForAlignedSize = IRB.CreateInBoundsGEP( + MetadataStructType, SwLDSMetadata, {Index0, Index1, Index2AlignedSize}); + + Value *AlignedDynLDSSize = + IRB.CreateAdd(CurrDynLDSSize, MaxAlignValueMinusOne); + AlignedDynLDSSize = IRB.CreateUDiv(AlignedDynLDSSize, MaxAlignValue); + AlignedDynLDSSize = IRB.CreateMul(AlignedDynLDSSize, MaxAlignValue); + IRB.CreateStore(AlignedDynLDSSize, GEPForAlignedSize); + + // Update the Current Malloc Size + *CurrMallocSize = IRB.CreateAdd(*CurrMallocSize, AlignedDynLDSSize); + } +} + +static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, + DISubprogram *SP) { + assert(InsertBefore); + if (InsertBefore->getDebugLoc()) + return InsertBefore->getDebugLoc(); + if (SP) + return DILocation::get(SP->getContext(), SP->getLine(), 1, SP); + return DebugLoc(); +} + +void AMDGPUSwLowerLDS::getLDSMemoryInstructions( + Function *Func, SetVector &LDSInstructions) { + for (BasicBlock &BB : *Func) { + for (Instruction &Inst : BB) { + if (LoadInst *LI = dyn_cast(&Inst)) { + if (LI->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) + LDSInstructions.insert(&Inst); + } else if (StoreInst *SI = dyn_cast(&Inst)) { + if (SI->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) + LDSInstructions.insert(&Inst); + } else if (AtomicRMWInst *RMW = dyn_cast(&Inst)) { + if (RMW->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) + LDSInstructions.insert(&Inst); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast(&Inst)) { + if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) + LDSInstructions.insert(&Inst); + } else + continue; + } + } +} + +Value * +AMDGPUSwLowerLDS::getTranslatedGlobalMemoryGEPOfLDSPointer(Value *LoadMallocPtr, + Value *LDSPtr) { + assert(LDSPtr && "Invalid LDS pointer operand"); + Value *PtrToInt = IRB.CreatePtrToInt(LDSPtr, IRB.getInt32Ty()); + Value *GEP = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt}); + return GEP; +} + +void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory( + Function *Func, Value *LoadMallocPtr, + SetVector &LDSInstructions) { + LLVM_DEBUG(dbgs() << "Translating LDS memory operations to global memory : " + << Func->getName()); + for (Instruction *Inst : LDSInstructions) { + IRB.SetInsertPoint(Inst); + if (LoadInst *LI = dyn_cast(Inst)) { + Value *LIOperand = LI->getPointerOperand(); + Value *Replacement = + getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, LIOperand); + LoadInst *NewLI = IRB.CreateAlignedLoad(LI->getType(), Replacement, + LI->getAlign(), LI->isVolatile()); + NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); + AsanInfo.Instructions.insert(NewLI); + LI->replaceAllUsesWith(NewLI); + LI->eraseFromParent(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + Value *SIOperand = SI->getPointerOperand(); + Value *Replacement = + getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, SIOperand); + StoreInst *NewSI = IRB.CreateAlignedStore( + SI->getValueOperand(), Replacement, SI->getAlign(), SI->isVolatile()); + NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); + AsanInfo.Instructions.insert(NewSI); + SI->replaceAllUsesWith(NewSI); + SI->eraseFromParent(); + } else if (AtomicRMWInst *RMW = dyn_cast(Inst)) { + Value *RMWPtrOperand = RMW->getPointerOperand(); + Value *RMWValOperand = RMW->getValOperand(); + Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer( + LoadMallocPtr, RMWPtrOperand); + AtomicRMWInst *NewRMW = IRB.CreateAtomicRMW( + RMW->getOperation(), Replacement, RMWValOperand, RMW->getAlign(), + RMW->getOrdering(), RMW->getSyncScopeID()); + NewRMW->setVolatile(RMW->isVolatile()); + AsanInfo.Instructions.insert(NewRMW); + RMW->replaceAllUsesWith(NewRMW); + RMW->eraseFromParent(); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast(Inst)) { + Value *XCHGPtrOperand = XCHG->getPointerOperand(); + Value *Replacement = getTranslatedGlobalMemoryGEPOfLDSPointer( + LoadMallocPtr, XCHGPtrOperand); + AtomicCmpXchgInst *NewXCHG = IRB.CreateAtomicCmpXchg( + Replacement, XCHG->getCompareOperand(), XCHG->getNewValOperand(), + XCHG->getAlign(), XCHG->getSuccessOrdering(), + XCHG->getFailureOrdering(), XCHG->getSyncScopeID()); + NewXCHG->setVolatile(XCHG->isVolatile()); + AsanInfo.Instructions.insert(NewXCHG); + XCHG->replaceAllUsesWith(NewXCHG); + XCHG->eraseFromParent(); + } else + report_fatal_error("Unimplemented LDS lowering instruction"); + } +} + +void AMDGPUSwLowerLDS::poisonRedzones(Function *Func, Value *MallocPtr) { + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + Type *Int64Ty = IRB.getInt64Ty(); + Type *VoidTy = IRB.getVoidTy(); + FunctionCallee AsanPoisonRegion = M.getOrInsertFunction( + "__asan_poison_region", + FunctionType::get(VoidTy, {Int64Ty, Int64Ty}, false)); + + auto RedzonesVec = LDSParams.RedzoneOffsetAndSizeVector; + size_t VecSize = RedzonesVec.size(); + for (unsigned i = 0; i < VecSize; i++) { + auto &RedzonePair = RedzonesVec[i]; + uint64_t RedzoneOffset = RedzonePair.first; + uint64_t RedzoneSize = RedzonePair.second; + Value *RedzoneAddrOffset = IRB.CreateInBoundsGEP( + IRB.getInt8Ty(), MallocPtr, {IRB.getInt64(RedzoneOffset)}); + Value *RedzoneAddress = IRB.CreatePtrToInt(RedzoneAddrOffset, Int64Ty); + IRB.CreateCall(AsanPoisonRegion, + {RedzoneAddress, IRB.getInt64(RedzoneSize)}); + } +} + +void AMDGPUSwLowerLDS::lowerKernelLDSAccesses(Function *Func, + DomTreeUpdater &DTU) { + LLVM_DEBUG(dbgs() << "Sw Lowering Kernel LDS for : " << Func->getName()); + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + auto &Ctx = M.getContext(); + auto *PrevEntryBlock = &Func->getEntryBlock(); + SetVector LDSInstructions; + getLDSMemoryInstructions(Func, LDSInstructions); + + // Create malloc block. + auto *MallocBlock = BasicBlock::Create(Ctx, "Malloc", Func, PrevEntryBlock); + + // Create WIdBlock block which has instructions related to selection of + // {0,0,0} indiex work item in the work group. + auto *WIdBlock = BasicBlock::Create(Ctx, "WId", Func, MallocBlock); + IRB.SetInsertPoint(WIdBlock, WIdBlock->begin()); + DebugLoc FirstDL = + getOrCreateDebugLoc(&*PrevEntryBlock->begin(), Func->getSubprogram()); + IRB.SetCurrentDebugLocation(FirstDL); + Value *WIdx = IRB.CreateIntrinsic(Intrinsic::amdgcn_workitem_id_x, {}, {}); + Value *WIdy = IRB.CreateIntrinsic(Intrinsic::amdgcn_workitem_id_y, {}, {}); + Value *WIdz = IRB.CreateIntrinsic(Intrinsic::amdgcn_workitem_id_z, {}, {}); + Value *XYOr = IRB.CreateOr(WIdx, WIdy); + Value *XYZOr = IRB.CreateOr(XYOr, WIdz); + Value *WIdzCond = IRB.CreateICmpEQ(XYZOr, IRB.getInt32(0)); + + // All work items will branch to PrevEntryBlock except {0,0,0} index + // work item which will branch to malloc block. + IRB.CreateCondBr(WIdzCond, MallocBlock, PrevEntryBlock); + + // Malloc block + IRB.SetInsertPoint(MallocBlock, MallocBlock->begin()); + + // If Dynamic LDS globals are accessed by the kernel, + // Get the size of dyn lds from hidden dyn_lds_size kernel arg. + // Update the corresponding metadata global entries for this dyn lds global. + GlobalVariable *SwLDS = LDSParams.SwLDS; + GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata; + assert(SwLDS && SwLDSMetadata); + StructType *MetadataStructType = + cast(SwLDSMetadata->getValueType()); + uint32_t MallocSize = 0; + Value *CurrMallocSize; + Type *Int32Ty = IRB.getInt32Ty(); + Type *Int64Ty = IRB.getInt64Ty(); + + SetVector UniqueLDSGlobals; + auto GetUniqueLDSGlobals = [&](SetVector &LDSGlobals) { + for (auto &GV : LDSGlobals) { + if (is_contained(UniqueLDSGlobals, GV)) + continue; + UniqueLDSGlobals.insert(GV); + } + }; + + GetUniqueLDSGlobals(LDSParams.DirectAccess.StaticLDSGlobals); + GetUniqueLDSGlobals(LDSParams.IndirectAccess.StaticLDSGlobals); + unsigned NumStaticLDS = 1 + UniqueLDSGlobals.size(); + UniqueLDSGlobals.clear(); + + if (NumStaticLDS) { + auto *GEPForEndStaticLDSOffset = + IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata, + {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, NumStaticLDS - 1), + ConstantInt::get(Int32Ty, 0)}); + + auto *GEPForEndStaticLDSSize = + IRB.CreateInBoundsGEP(MetadataStructType, SwLDSMetadata, + {ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, NumStaticLDS - 1), + ConstantInt::get(Int32Ty, 2)}); + + Value *EndStaticLDSOffset = + IRB.CreateLoad(Int32Ty, GEPForEndStaticLDSOffset); + Value *EndStaticLDSSize = IRB.CreateLoad(Int32Ty, GEPForEndStaticLDSSize); + CurrMallocSize = IRB.CreateAdd(EndStaticLDSOffset, EndStaticLDSSize); + } else + CurrMallocSize = IRB.getInt32(MallocSize); + + if (LDSParams.SwDynLDS) { + if (!(AMDGPU::getAMDHSACodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5)) + report_fatal_error( + "Dynamic LDS size query is only supported for CO V5 and later."); + // Get size from hidden dyn_lds_size argument of kernel + Value *ImplicitArg = + IRB.CreateIntrinsic(Intrinsic::amdgcn_implicitarg_ptr, {}, {}); + Value *HiddenDynLDSSize = IRB.CreateInBoundsGEP( + ImplicitArg->getType(), ImplicitArg, + {ConstantInt::get(Int64Ty, COV5_HIDDEN_DYN_LDS_SIZE_ARG)}); + UniqueLDSGlobals.clear(); + GetUniqueLDSGlobals(LDSParams.DirectAccess.DynamicLDSGlobals); + GetUniqueLDSGlobals(LDSParams.IndirectAccess.DynamicLDSGlobals); + updateMallocSizeForDynamicLDS(Func, &CurrMallocSize, HiddenDynLDSSize, + UniqueLDSGlobals); + } + + CurrMallocSize = IRB.CreateZExt(CurrMallocSize, Int64Ty); + + // Create a call to malloc function which does device global memory allocation + // with size equals to all LDS global accesses size in this kernel. + Value *ReturnAddress = + IRB.CreateIntrinsic(Intrinsic::returnaddress, {}, {IRB.getInt32(0)}); + FunctionCallee MallocFunc = M.getOrInsertFunction( + StringRef("__asan_malloc_impl"), + FunctionType::get(Int64Ty, {Int64Ty, Int64Ty}, false)); + Value *RAPtrToInt = IRB.CreatePtrToInt(ReturnAddress, Int64Ty); + Value *MallocCall = IRB.CreateCall(MallocFunc, {CurrMallocSize, RAPtrToInt}); + + Value *MallocPtr = + IRB.CreateIntToPtr(MallocCall, IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS)); + + // Create store of malloc to new global + IRB.CreateStore(MallocPtr, SwLDS); + + // Create calls to __asan_poison_region to poison redzones. + poisonRedzones(Func, MallocPtr); + + // Create branch to PrevEntryBlock + IRB.CreateBr(PrevEntryBlock); + + // Create wave-group barrier at the starting of Previous entry block + Type *Int1Ty = IRB.getInt1Ty(); + IRB.SetInsertPoint(PrevEntryBlock, PrevEntryBlock->begin()); + auto *XYZCondPhi = IRB.CreatePHI(Int1Ty, 2, "xyzCond"); + XYZCondPhi->addIncoming(IRB.getInt1(0), WIdBlock); + XYZCondPhi->addIncoming(IRB.getInt1(1), MallocBlock); + + IRB.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); + + // Load malloc pointer from Sw LDS. + Value *LoadMallocPtr = + IRB.CreateLoad(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), SwLDS); + + // Replace All uses of LDS globals with new LDS pointers. + replaceKernelLDSAccesses(Func); + + // Replace Memory Operations on LDS with corresponding + // global memory pointers. + translateLDSMemoryOperationsToGlobalMemory(Func, LoadMallocPtr, + LDSInstructions); + + auto *CondFreeBlock = BasicBlock::Create(Ctx, "CondFree", Func); + auto *FreeBlock = BasicBlock::Create(Ctx, "Free", Func); + auto *EndBlock = BasicBlock::Create(Ctx, "End", Func); + for (BasicBlock &BB : *Func) { + if (!BB.empty()) { + if (ReturnInst *RI = dyn_cast(&BB.back())) { + RI->eraseFromParent(); + IRB.SetInsertPoint(&BB, BB.end()); + IRB.CreateBr(CondFreeBlock); + } + } + } + + // Cond Free Block + IRB.SetInsertPoint(CondFreeBlock, CondFreeBlock->begin()); + IRB.CreateIntrinsic(Intrinsic::amdgcn_s_barrier, {}, {}); + IRB.CreateCondBr(XYZCondPhi, FreeBlock, EndBlock); + + // Free Block + IRB.SetInsertPoint(FreeBlock, FreeBlock->begin()); + + // Free the previously allocate device global memory. + FunctionCallee AsanFreeFunc = M.getOrInsertFunction( + StringRef("__asan_free_impl"), + FunctionType::get(IRB.getVoidTy(), {Int64Ty, Int64Ty}, false)); + Value *ReturnAddr = IRB.CreateCall( + Intrinsic::getDeclaration(&M, Intrinsic::returnaddress), IRB.getInt32(0)); + Value *RAPToInt = IRB.CreatePtrToInt(ReturnAddr, Int64Ty); + Value *MallocPtrToInt = IRB.CreatePtrToInt(LoadMallocPtr, Int64Ty); + IRB.CreateCall(AsanFreeFunc, {MallocPtrToInt, RAPToInt}); + + IRB.CreateBr(EndBlock); + + // End Block + IRB.SetInsertPoint(EndBlock, EndBlock->begin()); + IRB.CreateRetVoid(); + // Update the DomTree with corresponding links to basic blocks. + DTU.applyUpdates({{DominatorTree::Insert, WIdBlock, MallocBlock}, + {DominatorTree::Insert, MallocBlock, PrevEntryBlock}, + {DominatorTree::Insert, CondFreeBlock, FreeBlock}, + {DominatorTree::Insert, FreeBlock, EndBlock}}); +} + +Constant *AMDGPUSwLowerLDS::getAddressesOfVariablesInKernel( + Function *Func, SetVector &Variables) { + Type *Int32Ty = IRB.getInt32Ty(); + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + + GlobalVariable *SwLDSMetadata = LDSParams.SwLDSMetadata; + assert(SwLDSMetadata); + auto *SwLDSMetadataStructType = + cast(SwLDSMetadata->getValueType()); + ArrayType *KernelOffsetsType = + ArrayType::get(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), Variables.size()); + + SmallVector Elements; + for (size_t i = 0; i < Variables.size(); i++) { + GlobalVariable *GV = Variables[i]; + if (!LDSParams.LDSToReplacementIndicesMap.contains(GV)) { + Elements.push_back( + PoisonValue::get(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS))); + continue; + } + auto &Indices = LDSParams.LDSToReplacementIndicesMap[GV]; + Constant *GEPIdx[] = {ConstantInt::get(Int32Ty, Indices[0]), + ConstantInt::get(Int32Ty, Indices[1]), + ConstantInt::get(Int32Ty, Indices[2])}; + Constant *GEP = ConstantExpr::getGetElementPtr(SwLDSMetadataStructType, + SwLDSMetadata, GEPIdx, true); + Elements.push_back(GEP); + } + return ConstantArray::get(KernelOffsetsType, Elements); +} + +void AMDGPUSwLowerLDS::buildNonKernelLDSBaseTable( + NonKernelLDSParameters &NKLDSParams) { + // Base table will have single row, with elements of the row + // placed as per kernel ID. Each element in the row corresponds + // to addresss of "SW LDS" global of the kernel. + auto &Kernels = NKLDSParams.OrderedKernels; + if (Kernels.empty()) + return; + Type *Int32Ty = IRB.getInt32Ty(); + const size_t NumberKernels = Kernels.size(); + ArrayType *AllKernelsOffsetsType = + ArrayType::get(IRB.getPtrTy(AMDGPUAS::LOCAL_ADDRESS), NumberKernels); + std::vector OverallConstantExprElts(NumberKernels); + for (size_t i = 0; i < NumberKernels; i++) { + Function *Func = Kernels[i]; + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + GlobalVariable *SwLDS = LDSParams.SwLDS; + assert(SwLDS); + Constant *GEPIdx[] = {ConstantInt::get(Int32Ty, 0)}; + Constant *GEP = + ConstantExpr::getGetElementPtr(SwLDS->getType(), SwLDS, GEPIdx, true); + OverallConstantExprElts[i] = GEP; + } + Constant *init = + ConstantArray::get(AllKernelsOffsetsType, OverallConstantExprElts); + NKLDSParams.LDSBaseTable = new GlobalVariable( + M, AllKernelsOffsetsType, true, GlobalValue::InternalLinkage, init, + "llvm.amdgcn.sw.lds.base.table", nullptr, GlobalValue::NotThreadLocal, + AMDGPUAS::GLOBAL_ADDRESS); + GlobalValue::SanitizerMetadata MD; + MD.NoAddress = true; + NKLDSParams.LDSBaseTable->setSanitizerMetadata(MD); +} + +void AMDGPUSwLowerLDS::buildNonKernelLDSOffsetTable( + NonKernelLDSParameters &NKLDSParams) { + // Offset table will have multiple rows and columns. + // Rows are assumed to be from 0 to (n-1). n is total number + // of kernels accessing the LDS through non-kernels. + // Each row will have m elements. m is the total number of + // unique LDS globals accessed by non-kernels. + // Each element in the row correspond to the address of + // the replacement of LDS global done by that particular kernel. + auto &Variables = NKLDSParams.OrdereLDSGlobals; + auto &Kernels = NKLDSParams.OrderedKernels; + if (Variables.empty() || Kernels.empty()) + return; + const size_t NumberVariables = Variables.size(); + const size_t NumberKernels = Kernels.size(); + + ArrayType *KernelOffsetsType = + ArrayType::get(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), NumberVariables); + + ArrayType *AllKernelsOffsetsType = + ArrayType::get(KernelOffsetsType, NumberKernels); + std::vector overallConstantExprElts(NumberKernels); + for (size_t i = 0; i < NumberKernels; i++) { + Function *Func = Kernels[i]; + overallConstantExprElts[i] = + getAddressesOfVariablesInKernel(Func, Variables); + } + Constant *Init = + ConstantArray::get(AllKernelsOffsetsType, overallConstantExprElts); + NKLDSParams.LDSOffsetTable = new GlobalVariable( + M, AllKernelsOffsetsType, true, GlobalValue::InternalLinkage, Init, + "llvm.amdgcn.sw.lds.offset.table", nullptr, GlobalValue::NotThreadLocal, + AMDGPUAS::GLOBAL_ADDRESS); + GlobalValue::SanitizerMetadata MD; + MD.NoAddress = true; + NKLDSParams.LDSOffsetTable->setSanitizerMetadata(MD); +} + +void AMDGPUSwLowerLDS::lowerNonKernelLDSAccesses( + Function *Func, SetVector &LDSGlobals, + NonKernelLDSParameters &NKLDSParams) { + // Replace LDS access in non-kernel with replacement queried from + // Base table and offset from offset table. + LLVM_DEBUG(dbgs() << "Sw LDS lowering, lower non-kernel access for : " + << Func->getName()); + auto InsertAt = Func->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); + IRB.SetInsertPoint(InsertAt); + + // Get LDS memory instructions. + SetVector LDSInstructions; + getLDSMemoryInstructions(Func, LDSInstructions); + + Function *Decl = + Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_lds_kernel_id, {}); + auto *KernelId = IRB.CreateCall(Decl, {}); + GlobalVariable *LDSBaseTable = NKLDSParams.LDSBaseTable; + GlobalVariable *LDSOffsetTable = NKLDSParams.LDSOffsetTable; + auto &OrdereLDSGlobals = NKLDSParams.OrdereLDSGlobals; + Value *BaseGEP = IRB.CreateInBoundsGEP( + LDSBaseTable->getValueType(), LDSBaseTable, {IRB.getInt32(0), KernelId}); + Value *BaseLoad = + IRB.CreateLoad(IRB.getPtrTy(AMDGPUAS::LOCAL_ADDRESS), BaseGEP); + Value *LoadMallocPtr = + IRB.CreateLoad(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), BaseLoad); + + for (GlobalVariable *GV : LDSGlobals) { + auto GVIt = std::find(OrdereLDSGlobals.begin(), OrdereLDSGlobals.end(), GV); + assert(GVIt != OrdereLDSGlobals.end()); + uint32_t GVOffset = std::distance(OrdereLDSGlobals.begin(), GVIt); + + Value *OffsetGEP = IRB.CreateInBoundsGEP( + LDSOffsetTable->getValueType(), LDSOffsetTable, + {IRB.getInt32(0), KernelId, IRB.getInt32(GVOffset)}); + Value *OffsetLoad = + IRB.CreateLoad(IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS), OffsetGEP); + Value *Offset = IRB.CreateLoad(IRB.getInt32Ty(), OffsetLoad); + Value *BasePlusOffset = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), BaseLoad, {Offset}); + LLVM_DEBUG(dbgs() << "Sw LDS Lowering, Replace non-kernel LDS for " + << GV->getName()); + replacesUsesOfGlobalInFunction(Func, GV, BasePlusOffset); + } + translateLDSMemoryOperationsToGlobalMemory(Func, LoadMallocPtr, + LDSInstructions); +} + +static void reorderStaticDynamicIndirectLDSSet(KernelLDSParameters &LDSParams) { + // Sort Static, dynamic LDS globals which are either + // direct or indirect access on basis of name. + auto &DirectAccess = LDSParams.DirectAccess; + auto &IndirectAccess = LDSParams.IndirectAccess; + LDSParams.DirectAccess.StaticLDSGlobals = sortByName( + std::vector(DirectAccess.StaticLDSGlobals.begin(), + DirectAccess.StaticLDSGlobals.end())); + LDSParams.DirectAccess.DynamicLDSGlobals = sortByName( + std::vector(DirectAccess.DynamicLDSGlobals.begin(), + DirectAccess.DynamicLDSGlobals.end())); + LDSParams.IndirectAccess.StaticLDSGlobals = sortByName( + std::vector(IndirectAccess.StaticLDSGlobals.begin(), + IndirectAccess.StaticLDSGlobals.end())); + LDSParams.IndirectAccess.DynamicLDSGlobals = sortByName( + std::vector(IndirectAccess.DynamicLDSGlobals.begin(), + IndirectAccess.DynamicLDSGlobals.end())); +} + +void AMDGPUSwLowerLDS::initAsanInfo() { + // Get Shadow mapping scale and offset. + unsigned LongSize = + M.getDataLayout().getPointerSizeInBits(AMDGPUAS::GLOBAL_ADDRESS); + uint64_t Offset; + int Scale; + bool OrShadowOffset; + llvm::getAddressSanitizerParams(Triple(AMDGPUTM.getTargetTriple()), LongSize, + false, &Offset, &Scale, &OrShadowOffset); + AsanInfo.Scale = Scale; + AsanInfo.Offset = Offset; + return; +} + +bool AMDGPUSwLowerLDS::run() { + bool Changed = false; + + CallGraph CG = CallGraph(M); + + Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M); + + // Get all the direct and indirect access of LDS for all the kernels. + LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M); + + // Utility to group LDS access into direct, indirect, static and dynamic. + auto PopulateKernelStaticDynamicLDS = [&](FunctionVariableMap &LDSAccesses, + bool DirectAccess) { + for (auto &K : LDSAccesses) { + Function *F = K.first; + if (!F || K.second.empty()) + continue; + + assert(isKernelLDS(F)); + if (!F->hasFnAttribute(Attribute::SanitizeAddress)) + continue; + + // Only inserts if key isn't already in the map. + FuncLDSAccessInfo.KernelToLDSParametersMap.insert( + {F, KernelLDSParameters()}); + + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[F]; + if (!DirectAccess) + FuncLDSAccessInfo.KernelsWithIndirectLDSAccess.insert(F); + for (GlobalVariable *GV : K.second) { + if (!DirectAccess) { + if (AMDGPU::isDynamicLDS(*GV)) + LDSParams.IndirectAccess.DynamicLDSGlobals.insert(GV); + else + LDSParams.IndirectAccess.StaticLDSGlobals.insert(GV); + FuncLDSAccessInfo.AllNonKernelLDSAccess.insert(GV); + } else { + if (AMDGPU::isDynamicLDS(*GV)) + LDSParams.DirectAccess.DynamicLDSGlobals.insert(GV); + else + LDSParams.DirectAccess.StaticLDSGlobals.insert(GV); + } + } + } + }; + + PopulateKernelStaticDynamicLDS(LDSUsesInfo.direct_access, true); + PopulateKernelStaticDynamicLDS(LDSUsesInfo.indirect_access, false); + + // Get address sanitizer scale. + initAsanInfo(); + + for (auto &K : FuncLDSAccessInfo.KernelToLDSParametersMap) { + Function *Func = K.first; + auto &LDSParams = FuncLDSAccessInfo.KernelToLDSParametersMap[Func]; + if (LDSParams.DirectAccess.StaticLDSGlobals.empty() && + LDSParams.DirectAccess.DynamicLDSGlobals.empty() && + LDSParams.IndirectAccess.StaticLDSGlobals.empty() && + LDSParams.IndirectAccess.DynamicLDSGlobals.empty()) { + Changed = false; + } else { + removeFnAttrFromReachable(CG, Func, + {"amdgpu-no-workitem-id-x", + "amdgpu-no-workitem-id-y", + "amdgpu-no-workitem-id-z"}); + reorderStaticDynamicIndirectLDSSet(LDSParams); + buildSwLDSGlobal(Func); + buildSwDynLDSGlobal(Func); + populateSwMetadataGlobal(Func); + populateSwLDSAttributeAndMetadata(Func); + populateLDSToReplacementIndicesMap(Func); + DomTreeUpdater DTU(DTCallback(*Func), + DomTreeUpdater::UpdateStrategy::Lazy); + lowerKernelLDSAccesses(Func, DTU); + Changed = true; + } + } + + // Get the Uses of LDS from non-kernels. + getUsesOfLDSByNonKernels(); + + // Get non-kernels with LDS ptr as argument and called by kernels. + getNonKernelsWithLDSArguments(CG); + + if (!FuncLDSAccessInfo.NonKernelToLDSAccessMap.empty() || + !FuncLDSAccessInfo.NonKernelsWithLDSArgument.empty()) { + NonKernelLDSParameters NKLDSParams; + NKLDSParams.OrderedKernels = getOrderedIndirectLDSAccessingKernels( + FuncLDSAccessInfo.KernelsWithIndirectLDSAccess); + NKLDSParams.OrdereLDSGlobals = getOrderedNonKernelAllLDSGlobals( + FuncLDSAccessInfo.AllNonKernelLDSAccess); + buildNonKernelLDSBaseTable(NKLDSParams); + buildNonKernelLDSOffsetTable(NKLDSParams); + for (auto &K : FuncLDSAccessInfo.NonKernelToLDSAccessMap) { + Function *Func = K.first; + DenseSet &LDSGlobals = K.second; + SetVector OrderedLDSGlobals = sortByName( + std::vector(LDSGlobals.begin(), LDSGlobals.end())); + lowerNonKernelLDSAccesses(Func, OrderedLDSGlobals, NKLDSParams); + } + for (Function *Func : FuncLDSAccessInfo.NonKernelsWithLDSArgument) { + auto &K = FuncLDSAccessInfo.NonKernelToLDSAccessMap; + if (K.find(Func) != K.end()) + continue; + SetVector Vec; + lowerNonKernelLDSAccesses(Func, Vec, NKLDSParams); + } + Changed = true; + } + + if (!Changed) + return Changed; + + for (auto &GV : make_early_inc_range(M.globals())) { + if (AMDGPU::isLDSVariableToLower(GV)) { + // probably want to remove from used lists + GV.removeDeadConstantUsers(); + if (GV.use_empty()) + GV.eraseFromParent(); + } + } + + if (AsanInstrumentLDS) { + SmallVector OperandsToInstrument; + for (Instruction *Inst : AsanInfo.Instructions) { + SmallVector InterestingOperands; + getInterestingMemoryOperands(M, Inst, InterestingOperands); + for (auto &Operand : InterestingOperands) { + OperandsToInstrument.push_back(Operand); + } + } + for (auto &Operand : OperandsToInstrument) { + Value *Addr = Operand.getPtr(); + instrumentAddress(M, IRB, Operand.getInsn(), Operand.getInsn(), Addr, + Operand.Alignment.valueOrOne(), Operand.TypeStoreSize, + Operand.IsWrite, nullptr, false, false, AsanInfo.Scale, + AsanInfo.Offset); + Changed = true; + } + } + + return Changed; +} + +class AMDGPUSwLowerLDSLegacy : public ModulePass { +public: + const AMDGPUTargetMachine *AMDGPUTM; + static char ID; + AMDGPUSwLowerLDSLegacy(const AMDGPUTargetMachine *TM) + : ModulePass(ID), AMDGPUTM(TM) { + initializeAMDGPUSwLowerLDSLegacyPass(*PassRegistry::getPassRegistry()); + } + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + } +}; +} // namespace + +char AMDGPUSwLowerLDSLegacy::ID = 0; +char &llvm::AMDGPUSwLowerLDSLegacyPassID = AMDGPUSwLowerLDSLegacy::ID; + +INITIALIZE_PASS_BEGIN(AMDGPUSwLowerLDSLegacy, "amdgpu-sw-lower-lds", + "AMDGPU Software lowering of LDS", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(AMDGPUSwLowerLDSLegacy, "amdgpu-sw-lower-lds", + "AMDGPU Software lowering of LDS", false, false) + +bool AMDGPUSwLowerLDSLegacy::runOnModule(Module &M) { + // AddressSanitizer pass adds "nosanitize_address" module flag if it has + // instrumented the IR. Return early if the flag is not present. + if (!M.getModuleFlag("nosanitize_address")) + return false; + DominatorTreeWrapperPass *const DTW = + getAnalysisIfAvailable(); + auto DTCallback = [&DTW](Function &F) -> DominatorTree * { + return DTW ? &DTW->getDomTree() : nullptr; + }; + if (!AMDGPUTM) { + auto &TPC = getAnalysis(); + AMDGPUTM = &TPC.getTM(); + } + AMDGPUSwLowerLDS SwLowerLDSImpl(M, *AMDGPUTM, DTCallback); + bool IsChanged = SwLowerLDSImpl.run(); + return IsChanged; +} + +ModulePass * +llvm::createAMDGPUSwLowerLDSLegacyPass(const AMDGPUTargetMachine *TM) { + return new AMDGPUSwLowerLDSLegacy(TM); +} + +PreservedAnalyses AMDGPUSwLowerLDSPass::run(Module &M, + ModuleAnalysisManager &AM) { + // AddressSanitizer pass adds "nosanitize_address" module flag if it has + // instrumented the IR. Return early if the flag is not present. + if (!M.getModuleFlag("nosanitize_address")) + return PreservedAnalyses::all(); + auto &FAM = AM.getResult(M).getManager(); + auto DTCallback = [&FAM](Function &F) -> DominatorTree * { + return &FAM.getResult(F); + }; + AMDGPUSwLowerLDS SwLowerLDSImpl(M, TM, DTCallback); + bool IsChanged = SwLowerLDSImpl.run(); + if (!IsChanged) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7ac7b3315bb97..49beb03488e7c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -412,6 +412,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSILoadStoreOptimizerPass(*PR); initializeAMDGPUCtorDtorLoweringLegacyPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); + initializeAMDGPUSwLowerLDSLegacyPass(*PR); initializeAMDGPUAttributorLegacyPass(*PR); initializeAMDGPUAnnotateKernelFeaturesPass(*PR); initializeAMDGPUAnnotateUniformValuesLegacyPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 9ed7981b3da5a..e7aa97bb1e528 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -74,6 +74,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPULowerKernelArguments.cpp AMDGPULowerKernelAttributes.cpp AMDGPULowerModuleLDSPass.cpp + AMDGPUSwLowerLDS.cpp AMDGPUMachineFunction.cpp AMDGPUMachineModuleInfo.cpp AMDGPUMacroFusion.cpp diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll new file mode 100644 index 0000000000000..2776b9187724c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access-asan.ll @@ -0,0 +1,260 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check indirect dynamic LDS access through a non-kernel from kernel is lowered correctly. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +;. +define void @use_variables() sanitize_address { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP47]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 2147450880 +; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP47]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i8 +; CHECK-NEXT: [[TMP23:%.*]] = icmp sge i8 [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]] +; CHECK: 27: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP47]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: br label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 2147450880 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i8 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP38]] to i8 +; CHECK-NEXT: [[TMP40:%.*]] = icmp sge i8 [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0 +; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT1:%.*]], label [[TMP46:%.*]], !prof [[PROF3]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP45:%.*]] +; CHECK: 44: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP45]] +; CHECK: 45: +; CHECK-NEXT: br label [[TMP46]] +; CHECK: 46: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds_3, align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP27]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP36]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP37]], i64 24) +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP53]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP73]], i64 31) +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68 +; CHECK-NEXT: [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP75]], i64 28) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP29]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = lshr i64 [[TMP40]], 3 +; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], 2147450880 +; CHECK-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP42]] to ptr +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i8 [[TMP44]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = and i64 [[TMP40]], 7 +; CHECK-NEXT: [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8 +; CHECK-NEXT: [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0 +; CHECK-NEXT: br i1 [[TMP51]], label [[ASAN_REPORT:%.*]], label [[TMP54:%.*]], !prof [[PROF3]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP49]], label [[TMP52:%.*]], label [[CONDFREE:%.*]] +; CHECK: 52: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 53: +; CHECK-NEXT: br label [[TMP54]] +; CHECK: 54: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP39]], align 1 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[TMP57]], 3 +; CHECK-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP63]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP91:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = lshr i64 [[TMP91]], 3 +; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[TMP58]], 2147450880 +; CHECK-NEXT: [[TMP60:%.*]] = inttoptr i64 [[TMP59]] to ptr +; CHECK-NEXT: [[TMP61:%.*]] = load i8, ptr [[TMP60]], align 1 +; CHECK-NEXT: [[TMP62:%.*]] = icmp ne i8 [[TMP61]], 0 +; CHECK-NEXT: [[TMP64:%.*]] = and i64 [[TMP91]], 7 +; CHECK-NEXT: [[TMP65:%.*]] = trunc i64 [[TMP64]] to i8 +; CHECK-NEXT: [[TMP66:%.*]] = icmp sge i8 [[TMP65]], [[TMP61]] +; CHECK-NEXT: [[TMP67:%.*]] = and i1 [[TMP62]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP67]]) +; CHECK-NEXT: [[TMP69:%.*]] = icmp ne i64 [[TMP68]], 0 +; CHECK-NEXT: br i1 [[TMP69]], label [[ASAN_REPORT1:%.*]], label [[TMP72:%.*]], !prof [[PROF3]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP67]], label [[TMP70:%.*]], label [[TMP71:%.*]] +; CHECK: 72: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP91]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP71]] +; CHECK: 73: +; CHECK-NEXT: br label [[TMP72]] +; CHECK: 74: +; CHECK-NEXT: [[TMP92:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64 +; CHECK-NEXT: [[TMP76:%.*]] = lshr i64 [[TMP92]], 3 +; CHECK-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], 2147450880 +; CHECK-NEXT: [[TMP78:%.*]] = inttoptr i64 [[TMP77]] to ptr +; CHECK-NEXT: [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1 +; CHECK-NEXT: [[TMP80:%.*]] = icmp ne i8 [[TMP79]], 0 +; CHECK-NEXT: [[TMP81:%.*]] = and i64 [[TMP92]], 7 +; CHECK-NEXT: [[TMP82:%.*]] = trunc i64 [[TMP81]] to i8 +; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i8 [[TMP82]], [[TMP79]] +; CHECK-NEXT: [[TMP84:%.*]] = and i1 [[TMP80]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP84]]) +; CHECK-NEXT: [[TMP86:%.*]] = icmp ne i64 [[TMP85]], 0 +; CHECK-NEXT: br i1 [[TMP86]], label [[ASAN_REPORT2:%.*]], label [[TMP89:%.*]], !prof [[PROF3]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP84]], label [[TMP87:%.*]], label [[TMP88:%.*]] +; CHECK: 87: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP92]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP88]] +; CHECK: 88: +; CHECK-NEXT: br label [[TMP89]] +; CHECK: 89: +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP56]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables() + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META4]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll new file mode 100644 index 0000000000000..8cbeb80d62335 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-indirect-access.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check indirect dynamic LDS access through a non-kernel from kernel is lowered correctly. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +;. +define void @use_variables() sanitize_address { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds_3, align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP27]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP36]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP37]], i64 24) +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP53]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP73]], i64 31) +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68 +; CHECK-NEXT: [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP75]], i64 28) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP29]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP38]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP39]], align 1 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]] +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP56]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables() + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +; CHECK: [[META2]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll new file mode 100644 index 0000000000000..f33b30119754f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test-asan.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if direct access of dynamic LDS in kernel is lowered correctly. +@lds_1 = external addrspace(3) global [0 x i8] +@lds_2 = external addrspace(3) global [0 x i8] + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP20]], i64 15 +; CHECK-NEXT: store i32 [[TMP24]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(4) [[TMP18]], align 4 +; CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 1 +; CHECK-NEXT: store i32 [[TMP16]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP24]], [[TMP16]] +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP21]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint ptr addrspace(1) [[TMP42]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP44]], i64 24) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 23: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP28:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP10]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(3) [[TMP11]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP28]], i32 [[TMP45]] +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP46]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i64 [[TMP29]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], 2147450880 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i8 [[TMP33]], 0 +; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP29]], 7 +; CHECK-NEXT: [[TMP36:%.*]] = trunc i64 [[TMP35]] to i8 +; CHECK-NEXT: [[TMP37:%.*]] = icmp sge i8 [[TMP36]], [[TMP33]] +; CHECK-NEXT: [[TMP38:%.*]] = and i1 [[TMP34]], [[TMP37]] +; CHECK-NEXT: [[TMP39:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP38]]) +; CHECK-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP39]], 0 +; CHECK-NEXT: br i1 [[TMP40]], label [[ASAN_REPORT:%.*]], label [[TMP43:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP38]], label [[TMP41:%.*]], label [[CONDFREE:%.*]] +; CHECK: 41: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP29]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 42: +; CHECK-NEXT: br label [[TMP43]] +; CHECK: 43: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP46]], align 4 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + ;store i8 8, ptr addrspace(3) @lds_2, align 8 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll new file mode 100644 index 0000000000000..5e90eb0b95219 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-dynamic-lds-test.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if direct access of dynamic LDS in kernel is lowered correctly. +@lds_1 = external addrspace(3) global [0 x i8] +@lds_2 = external addrspace(3) global [0 x i8] + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 0, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP20:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP20]], i64 15 +; CHECK-NEXT: store i32 [[TMP24]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(4) [[TMP18]], align 4 +; CHECK-NEXT: store i32 [[TMP13]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = udiv i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 1 +; CHECK-NEXT: store i32 [[TMP16]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP24]], [[TMP16]] +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP21]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint ptr addrspace(1) [[TMP42]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP44]], i64 24) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 23: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP28:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP10]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(3) [[TMP11]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP28]], i32 [[TMP45]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP46]], align 4 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP25:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP27]], i64 [[TMP26]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + ;store i8 8, ptr addrspace(3) @lds_2, align 8 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll new file mode 100644 index 0000000000000..91e0a9fc5018b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access-asan.ll @@ -0,0 +1,385 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check when multiple kernels access the same non-kernel, LDS accesses are lowere correctly. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +define void @use_variables_1() sanitize_address { +; CHECK-LABEL: define void @use_variables_1( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP47]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 2147450880 +; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP47]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i8 +; CHECK-NEXT: [[TMP23:%.*]] = icmp sge i8 [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]] +; CHECK: 27: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP47]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: br label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 2147450880 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i8 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP38]] to i8 +; CHECK-NEXT: [[TMP40:%.*]] = icmp sge i8 [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0 +; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT1:%.*]], label [[TMP46:%.*]], !prof [[PROF3]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP45:%.*]] +; CHECK: 44: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP45]] +; CHECK: 45: +; CHECK-NEXT: br label [[TMP46]] +; CHECK: 46: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds_3, align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define void @use_variables_2() sanitize_address { +; CHECK-LABEL: define void @use_variables_2( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP48:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP48]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 2147450880 +; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP48]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i8 +; CHECK-NEXT: [[TMP23:%.*]] = icmp sge i8 [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]] +; CHECK: 27: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP48]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: br label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP14]], align 1 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP49:%.*]] = inttoptr i64 [[TMP38]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP65:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP65]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 2147450880 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i8 [[TMP36]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP65]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = trunc i64 [[TMP39]] to i8 +; CHECK-NEXT: [[TMP41:%.*]] = icmp sge i8 [[TMP40]], [[TMP36]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 [[TMP37]], [[TMP41]] +; CHECK-NEXT: [[TMP43:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP42]]) +; CHECK-NEXT: [[TMP44:%.*]] = icmp ne i64 [[TMP43]], 0 +; CHECK-NEXT: br i1 [[TMP44]], label [[ASAN_REPORT1:%.*]], label [[TMP47:%.*]], !prof [[PROF3]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP42]], label [[TMP45:%.*]], label [[TMP46:%.*]] +; CHECK: 47: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP65]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP46]] +; CHECK: 48: +; CHECK-NEXT: br label [[TMP47]] +; CHECK: 49: +; CHECK-NEXT: [[TMP50:%.*]] = ptrtoint ptr addrspace(1) [[TMP49]] to i64 +; CHECK-NEXT: [[TMP51:%.*]] = lshr i64 [[TMP50]], 3 +; CHECK-NEXT: [[TMP52:%.*]] = add i64 [[TMP51]], 2147450880 +; CHECK-NEXT: [[TMP53:%.*]] = inttoptr i64 [[TMP52]] to ptr +; CHECK-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1 +; CHECK-NEXT: [[TMP55:%.*]] = icmp ne i8 [[TMP54]], 0 +; CHECK-NEXT: [[TMP56:%.*]] = and i64 [[TMP50]], 7 +; CHECK-NEXT: [[TMP57:%.*]] = trunc i64 [[TMP56]] to i8 +; CHECK-NEXT: [[TMP58:%.*]] = icmp sge i8 [[TMP57]], [[TMP54]] +; CHECK-NEXT: [[TMP59:%.*]] = and i1 [[TMP55]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = icmp ne i64 [[TMP60]], 0 +; CHECK-NEXT: br i1 [[TMP61]], label [[ASAN_REPORT2:%.*]], label [[TMP64:%.*]], !prof [[PROF3]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP59]], label [[TMP62:%.*]], label [[TMP63:%.*]] +; CHECK: 62: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP50]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP63]] +; CHECK: 63: +; CHECK-NEXT: br label [[TMP64]] +; CHECK: 64: +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP31]], align 2 +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP25]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP25]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +; CHECK-NEXT: [[TMP28:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[TMP28]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP27]], i64 [[TMP33]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP24]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP49]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP51]], i64 24) +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP52]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP53]], i64 31) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 30: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables_1() +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP29]], i32 [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = ptrtoint ptr addrspace(1) [[TMP35]] to i64 +; CHECK-NEXT: [[TMP37:%.*]] = lshr i64 [[TMP36]], 3 +; CHECK-NEXT: [[TMP38:%.*]] = add i64 [[TMP37]], 2147450880 +; CHECK-NEXT: [[TMP39:%.*]] = inttoptr i64 [[TMP38]] to ptr +; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP39]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = icmp ne i8 [[TMP40]], 0 +; CHECK-NEXT: [[TMP42:%.*]] = and i64 [[TMP36]], 7 +; CHECK-NEXT: [[TMP43:%.*]] = trunc i64 [[TMP42]] to i8 +; CHECK-NEXT: [[TMP44:%.*]] = icmp sge i8 [[TMP43]], [[TMP40]] +; CHECK-NEXT: [[TMP45:%.*]] = and i1 [[TMP41]], [[TMP44]] +; CHECK-NEXT: [[TMP46:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP45]]) +; CHECK-NEXT: [[TMP47:%.*]] = icmp ne i64 [[TMP46]], 0 +; CHECK-NEXT: br i1 [[TMP47]], label [[ASAN_REPORT:%.*]], label [[TMP50:%.*]], !prof [[PROF3]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP45]], label [[TMP48:%.*]], label [[CONDFREE:%.*]] +; CHECK: 48: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP36]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 49: +; CHECK-NEXT: br label [[TMP50]] +; CHECK: 50: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP35]], align 1 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables_1() + store i8 7, ptr addrspace(3) @lds_1, align 1 + ret void +} + +define amdgpu_kernel void @k1() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k1( +; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP30]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP33]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP8]], [[TMP19]] +; CHECK-NEXT: store i32 [[TMP20]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = udiv i32 [[TMP22]], 8 +; CHECK-NEXT: [[TMP24:%.*]] = mul i32 [[TMP23]], 8 +; CHECK-NEXT: store i32 [[TMP24]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[TMP27]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP28]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP34]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP53]], i64 24) +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(1) [[TMP54]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP55]], i64 31) +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68 +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP57]], i64 28) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP31]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k1.dynlds) ] +; CHECK-NEXT: call void @use_variables_1() +; CHECK-NEXT: call void @use_variables_2() +; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr addrspace(3) [[TMP32]] to i32 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP29]], i32 [[TMP58]] +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(1) [[TMP59]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = lshr i64 [[TMP38]], 3 +; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[TMP39]], 2147450880 +; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP41]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i8 [[TMP42]], 0 +; CHECK-NEXT: [[TMP44:%.*]] = and i64 [[TMP38]], 7 +; CHECK-NEXT: [[TMP45:%.*]] = trunc i64 [[TMP44]] to i8 +; CHECK-NEXT: [[TMP46:%.*]] = icmp sge i8 [[TMP45]], [[TMP42]] +; CHECK-NEXT: [[TMP47:%.*]] = and i1 [[TMP43]], [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP47]]) +; CHECK-NEXT: [[TMP49:%.*]] = icmp ne i64 [[TMP48]], 0 +; CHECK-NEXT: br i1 [[TMP49]], label [[ASAN_REPORT:%.*]], label [[TMP52:%.*]], !prof [[PROF3]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP47]], label [[TMP50:%.*]], label [[CONDFREE:%.*]] +; CHECK: 50: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP38]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 51: +; CHECK-NEXT: br label [[TMP52]] +; CHECK: 52: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP59]], align 4 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP35:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[TMP35]] to i64 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP37]], i64 [[TMP36]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables_1() + call void @use_variables_2() + store i8 3, ptr addrspace(3) @lds_3, align 4 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META4]] = !{i32 0} +; CHECK: [[META5]] = !{i32 1} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll new file mode 100644 index 0000000000000..d0caddb7934a7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multi-static-dynamic-indirect-access.ll @@ -0,0 +1,227 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check when multiple kernels access the same non-kernel, LDS accesses are lowere correctly. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +define void @use_variables_1() sanitize_address { +; CHECK-LABEL: define void @use_variables_1( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds_3, align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define void @use_variables_2() sanitize_address { +; CHECK-LABEL: define void @use_variables_2( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x [4 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP14]], align 1 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP31]], align 2 +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP25]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP25]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +; CHECK-NEXT: [[TMP28:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[TMP28]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP27]], i64 [[TMP33]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP24]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP49]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP51]], i64 24) +; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP52]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP53]], i64 31) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 30: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP22]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables_1() +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP29]], i32 [[TMP34]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP35]], align 1 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables_1() + store i8 7, ptr addrspace(3) @lds_1, align 1 + ret void +} + +define amdgpu_kernel void @k1() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k1( +; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP30]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP33]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP8]], [[TMP19]] +; CHECK-NEXT: store i32 [[TMP20]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP21]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = udiv i32 [[TMP22]], 8 +; CHECK-NEXT: [[TMP24:%.*]] = mul i32 [[TMP23]], 8 +; CHECK-NEXT: store i32 [[TMP24]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = add i32 [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[TMP27]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP28]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP34]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP53:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP53]], i64 24) +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 33 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(1) [[TMP54]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP55]], i64 31) +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68 +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP57]], i64 28) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, align 8 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k1.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k1, i32 [[TMP31]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k1.dynlds) ] +; CHECK-NEXT: call void @use_variables_1() +; CHECK-NEXT: call void @use_variables_2() +; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr addrspace(3) [[TMP32]] to i32 +; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP29]], i32 [[TMP58]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP59]], align 4 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP35:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[TMP35]] to i64 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP37]], i64 [[TMP36]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables_1() + call void @use_variables_2() + store i8 3, ptr addrspace(3) @lds_3, align 4 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: [[META2]] = !{i32 0} +; CHECK: [[META3]] = !{i32 1} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll new file mode 100644 index 0000000000000..07baf90e370d1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return-asan.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check malloc and free blocks are placed correctly when multiple +; blocks and branching is present in the function with LDS accesses lowered correctly. + +@lds_1 = internal addrspace(3) global i32 poison +@lds_2 = internal addrspace(3) global i32 poison + +;. +; CHECK: @llvm.amdgcn.sw.lds.test_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.test_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.test_kernel.md.type { %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 32, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @test_kernel() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @test_kernel( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP14]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP17]], i64 [[TMP19]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP20]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP28]], i64 24) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 36 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP30]], i64 28) +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68 +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP32]], i64 28) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 20: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, i32 [[TMP10]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, i32 [[TMP25]] +; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast ptr addrspace(3) [[TMP11]] to ptr addrspace(1) +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(3) [[TMP26]] to ptr addrspace(1) +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(1) [[TMP13]], align 4 +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RESULT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[POSITIVE:%.*]], label [[NEGATIVE:%.*]] +; CHECK: positive: +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: negative: +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[VAL1]], 0 +; CHECK-NEXT: br i1 [[CMP2]], label [[VAL1_POSITIVE:%.*]], label [[VAL1_NEGATIVE:%.*]] +; CHECK: val1_positive: +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: val1_negative: +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP24]], i64 [[TMP23]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; +%val1 = load i32, ptr addrspace(1) addrspacecast (ptr addrspace(3) @lds_1 to ptr addrspace(1)) +%val2 = load i32, ptr addrspace(1) addrspacecast (ptr addrspace(3) @lds_2 to ptr addrspace(1)) + +%result = add i32 %val1, %val2 +%cmp = icmp sgt i32 %result, 0 +br i1 %cmp, label %positive, label %negative + +positive: +ret void + +negative: +%cmp2 = icmp sgt i32 %val1, 0 +br i1 %cmp2, label %val1_positive, label %val1_negative + +val1_positive: +ret void + +val1_negative: +ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll new file mode 100644 index 0000000000000..6848e2c06c1e1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-multiple-blocks-return.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check malloc and free blocks are placed correctly when multiple +; blocks and branching is present in the function with LDS accesses lowered correctly. + +@lds_1 = internal addrspace(3) global i32 poison +@lds_2 = internal addrspace(3) global i32 poison + +;. +; CHECK: @llvm.amdgcn.sw.lds.test_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.test_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.test_kernel.md.type { %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 32, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.test_kernel.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @test_kernel() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @test_kernel( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP14]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP17]], i64 [[TMP19]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP20]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP28]], i64 24) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 36 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP30]], i64 28) +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68 +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP32]], i64 28) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 20: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, i32 [[TMP10]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_TEST_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.test_kernel.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.test_kernel, i32 [[TMP25]] +; CHECK-NEXT: [[TMP12:%.*]] = addrspacecast ptr addrspace(3) [[TMP11]] to ptr addrspace(1) +; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(3) [[TMP26]] to ptr addrspace(1) +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(1) [[TMP13]], align 4 +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[VAL1]], [[VAL2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[RESULT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[POSITIVE:%.*]], label [[NEGATIVE:%.*]] +; CHECK: positive: +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: negative: +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[VAL1]], 0 +; CHECK-NEXT: br i1 [[CMP2]], label [[VAL1_POSITIVE:%.*]], label [[VAL1_NEGATIVE:%.*]] +; CHECK: val1_positive: +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: val1_negative: +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP24]], i64 [[TMP23]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; +%val1 = load i32, ptr addrspace(1) addrspacecast (ptr addrspace(3) @lds_1 to ptr addrspace(1)) +%val2 = load i32, ptr addrspace(1) addrspacecast (ptr addrspace(3) @lds_2 to ptr addrspace(1)) + +%result = add i32 %val1, %val2 +%cmp = icmp sgt i32 %result, 0 +br i1 %cmp, label %positive, label %negative + +positive: +ret void + +negative: +%cmp2 = icmp sgt i32 %val1, 0 +br i1 %cmp2, label %val1_positive, label %val1_negative + +val1_positive: +ret void + +val1_negative: +ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll new file mode 100644 index 0000000000000..40b1305a3b12c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access-asan.ll @@ -0,0 +1,261 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static and dynamic LDS accesses are lowered correctly when a non-kernel +; is called from kernel. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +;. +define void @use_variables() sanitize_address { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP47]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP16]], 2147450880 +; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr +; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP19]], 0 +; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP47]], 7 +; CHECK-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i8 +; CHECK-NEXT: [[TMP23:%.*]] = icmp sge i8 [[TMP22]], [[TMP19]] +; CHECK-NEXT: [[TMP24:%.*]] = and i1 [[TMP20]], [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP24]]) +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i64 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[TMP26]], label [[ASAN_REPORT:%.*]], label [[TMP29:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP24]], label [[TMP27:%.*]], label [[TMP28:%.*]] +; CHECK: 27: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP47]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: br label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 2147450880 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i8 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP38]] to i8 +; CHECK-NEXT: [[TMP40:%.*]] = icmp sge i8 [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0 +; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT1:%.*]], label [[TMP46:%.*]], !prof [[PROF3]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[TMP45:%.*]] +; CHECK: 44: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP45]] +; CHECK: 45: +; CHECK-NEXT: br label [[TMP46]] +; CHECK: 46: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds_3, align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP27]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP36]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP37]], i64 24) +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP53]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP73]], i64 31) +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68 +; CHECK-NEXT: [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP75]], i64 28) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP29]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = lshr i64 [[TMP40]], 3 +; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], 2147450880 +; CHECK-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP42]] to ptr +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i8 [[TMP44]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = and i64 [[TMP40]], 7 +; CHECK-NEXT: [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8 +; CHECK-NEXT: [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0 +; CHECK-NEXT: br i1 [[TMP51]], label [[ASAN_REPORT:%.*]], label [[TMP54:%.*]], !prof [[PROF3]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP49]], label [[TMP52:%.*]], label [[CONDFREE:%.*]] +; CHECK: 52: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 53: +; CHECK-NEXT: br label [[TMP54]] +; CHECK: 54: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP39]], align 1 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[TMP57]], 3 +; CHECK-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP63]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP91:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = lshr i64 [[TMP91]], 3 +; CHECK-NEXT: [[TMP59:%.*]] = add i64 [[TMP58]], 2147450880 +; CHECK-NEXT: [[TMP60:%.*]] = inttoptr i64 [[TMP59]] to ptr +; CHECK-NEXT: [[TMP61:%.*]] = load i8, ptr [[TMP60]], align 1 +; CHECK-NEXT: [[TMP62:%.*]] = icmp ne i8 [[TMP61]], 0 +; CHECK-NEXT: [[TMP64:%.*]] = and i64 [[TMP91]], 7 +; CHECK-NEXT: [[TMP65:%.*]] = trunc i64 [[TMP64]] to i8 +; CHECK-NEXT: [[TMP66:%.*]] = icmp sge i8 [[TMP65]], [[TMP61]] +; CHECK-NEXT: [[TMP67:%.*]] = and i1 [[TMP62]], [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP67]]) +; CHECK-NEXT: [[TMP69:%.*]] = icmp ne i64 [[TMP68]], 0 +; CHECK-NEXT: br i1 [[TMP69]], label [[ASAN_REPORT1:%.*]], label [[TMP72:%.*]], !prof [[PROF3]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP67]], label [[TMP70:%.*]], label [[TMP71:%.*]] +; CHECK: 72: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP91]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP71]] +; CHECK: 73: +; CHECK-NEXT: br label [[TMP72]] +; CHECK: 74: +; CHECK-NEXT: [[TMP92:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64 +; CHECK-NEXT: [[TMP76:%.*]] = lshr i64 [[TMP92]], 3 +; CHECK-NEXT: [[TMP77:%.*]] = add i64 [[TMP76]], 2147450880 +; CHECK-NEXT: [[TMP78:%.*]] = inttoptr i64 [[TMP77]] to ptr +; CHECK-NEXT: [[TMP79:%.*]] = load i8, ptr [[TMP78]], align 1 +; CHECK-NEXT: [[TMP80:%.*]] = icmp ne i8 [[TMP79]], 0 +; CHECK-NEXT: [[TMP81:%.*]] = and i64 [[TMP92]], 7 +; CHECK-NEXT: [[TMP82:%.*]] = trunc i64 [[TMP81]] to i8 +; CHECK-NEXT: [[TMP83:%.*]] = icmp sge i8 [[TMP82]], [[TMP79]] +; CHECK-NEXT: [[TMP84:%.*]] = and i1 [[TMP80]], [[TMP83]] +; CHECK-NEXT: [[TMP85:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP84]]) +; CHECK-NEXT: [[TMP86:%.*]] = icmp ne i64 [[TMP85]], 0 +; CHECK-NEXT: br i1 [[TMP86]], label [[ASAN_REPORT2:%.*]], label [[TMP89:%.*]], !prof [[PROF3]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP84]], label [[TMP87:%.*]], label [[TMP88:%.*]] +; CHECK: 87: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP92]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP88]] +; CHECK: 88: +; CHECK-NEXT: br label [[TMP89]] +; CHECK: 89: +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP56]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables() + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +; CHECK: [[META2:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META4]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll new file mode 100644 index 0000000000000..0cc49c94e2279 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-indirect-access.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static and dynamic LDS accesses are lowered correctly when a non-kernel +; is called from kernel. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +;. +define void @use_variables() sanitize_address { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP14]], align 4 +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP30]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + store i8 3, ptr addrspace(3) @lds_3, align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP27]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP26]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(1) [[TMP36]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP37]], i64 24) +; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP53]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP73]], i64 31) +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68 +; CHECK-NEXT: [[TMP75:%.*]] = ptrtoint ptr addrspace(1) [[TMP74]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP75]], i64 28) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP31:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP29]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP38]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP39]], align 1 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP30]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP31]], i32 [[TMP55]] +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP56]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP32:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP34]], i64 [[TMP33]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables() + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +; CHECK: [[META2]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll new file mode 100644 index 0000000000000..f2cdc4c812db1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test-asan.ll @@ -0,0 +1,213 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static and dynamic LDS accesses are lowered correctly in kernel. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP31]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP32]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP30]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP39]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 24) +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP111:%.*]] = ptrtoint ptr addrspace(1) [[TMP57]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP111]], i64 31) +; CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68 +; CHECK-NEXT: [[TMP113:%.*]] = ptrtoint ptr addrspace(1) [[TMP112]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP113]], i64 28) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP35:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP28]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint ptr addrspace(1) [[TMP43]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = lshr i64 [[TMP44]], 3 +; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP45]], 2147450880 +; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr +; CHECK-NEXT: [[TMP48:%.*]] = load i8, ptr [[TMP47]], align 1 +; CHECK-NEXT: [[TMP49:%.*]] = icmp ne i8 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = and i64 [[TMP44]], 7 +; CHECK-NEXT: [[TMP51:%.*]] = trunc i64 [[TMP50]] to i8 +; CHECK-NEXT: [[TMP52:%.*]] = icmp sge i8 [[TMP51]], [[TMP48]] +; CHECK-NEXT: [[TMP53:%.*]] = and i1 [[TMP49]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP53]]) +; CHECK-NEXT: [[TMP55:%.*]] = icmp ne i64 [[TMP54]], 0 +; CHECK-NEXT: br i1 [[TMP55]], label [[ASAN_REPORT:%.*]], label [[TMP58:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP53]], label [[TMP56:%.*]], label [[CONDFREE:%.*]] +; CHECK: 56: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP44]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 57: +; CHECK-NEXT: br label [[TMP58]] +; CHECK: 58: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP43]], align 4 +; CHECK-NEXT: [[TMP59:%.*]] = ptrtoint ptr addrspace(3) [[TMP27]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP59]] +; CHECK-NEXT: [[TMP61:%.*]] = ptrtoint ptr addrspace(1) [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = lshr i64 [[TMP61]], 3 +; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], 2147450880 +; CHECK-NEXT: [[TMP64:%.*]] = inttoptr i64 [[TMP63]] to ptr +; CHECK-NEXT: [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1 +; CHECK-NEXT: [[TMP66:%.*]] = icmp ne i8 [[TMP65]], 0 +; CHECK-NEXT: [[TMP67:%.*]] = and i64 [[TMP61]], 7 +; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], 3 +; CHECK-NEXT: [[TMP69:%.*]] = trunc i64 [[TMP68]] to i8 +; CHECK-NEXT: [[TMP70:%.*]] = icmp sge i8 [[TMP69]], [[TMP65]] +; CHECK-NEXT: [[TMP71:%.*]] = and i1 [[TMP66]], [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP71]]) +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i64 [[TMP72]], 0 +; CHECK-NEXT: br i1 [[TMP73]], label [[ASAN_REPORT1:%.*]], label [[TMP76:%.*]], !prof [[PROF2]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP71]], label [[TMP74:%.*]], label [[TMP75:%.*]] +; CHECK: 74: +; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP61]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP75]] +; CHECK: 75: +; CHECK-NEXT: br label [[TMP76]] +; CHECK: 76: +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP60]], align 8 +; CHECK-NEXT: [[TMP77:%.*]] = ptrtoint ptr addrspace(3) [[TMP29]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = ptrtoint ptr addrspace(1) [[TMP78]] to i64 +; CHECK-NEXT: [[TMP80:%.*]] = lshr i64 [[TMP79]], 3 +; CHECK-NEXT: [[TMP81:%.*]] = add i64 [[TMP80]], 2147450880 +; CHECK-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP81]] to ptr +; CHECK-NEXT: [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1 +; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i8 [[TMP83]], 0 +; CHECK-NEXT: [[TMP85:%.*]] = and i64 [[TMP79]], 7 +; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i8 +; CHECK-NEXT: [[TMP87:%.*]] = icmp sge i8 [[TMP86]], [[TMP83]] +; CHECK-NEXT: [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]]) +; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0 +; CHECK-NEXT: br i1 [[TMP90]], label [[ASAN_REPORT2:%.*]], label [[TMP93:%.*]], !prof [[PROF2]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP88]], label [[TMP91:%.*]], label [[TMP92:%.*]] +; CHECK: 91: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP79]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP92]] +; CHECK: 92: +; CHECK-NEXT: br label [[TMP93]] +; CHECK: 93: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP78]], align 4 +; CHECK-NEXT: [[TMP94:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32 +; CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP94]] +; CHECK-NEXT: [[TMP96:%.*]] = ptrtoint ptr addrspace(1) [[TMP95]] to i64 +; CHECK-NEXT: [[TMP97:%.*]] = lshr i64 [[TMP96]], 3 +; CHECK-NEXT: [[TMP98:%.*]] = add i64 [[TMP97]], 2147450880 +; CHECK-NEXT: [[TMP99:%.*]] = inttoptr i64 [[TMP98]] to ptr +; CHECK-NEXT: [[TMP100:%.*]] = load i8, ptr [[TMP99]], align 1 +; CHECK-NEXT: [[TMP101:%.*]] = icmp ne i8 [[TMP100]], 0 +; CHECK-NEXT: [[TMP102:%.*]] = and i64 [[TMP96]], 7 +; CHECK-NEXT: [[TMP103:%.*]] = trunc i64 [[TMP102]] to i8 +; CHECK-NEXT: [[TMP104:%.*]] = icmp sge i8 [[TMP103]], [[TMP100]] +; CHECK-NEXT: [[TMP105:%.*]] = and i1 [[TMP101]], [[TMP104]] +; CHECK-NEXT: [[TMP106:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP105]]) +; CHECK-NEXT: [[TMP107:%.*]] = icmp ne i64 [[TMP106]], 0 +; CHECK-NEXT: br i1 [[TMP107]], label [[ASAN_REPORT3:%.*]], label [[TMP110:%.*]], !prof [[PROF2]] +; CHECK: asan.report3: +; CHECK-NEXT: br i1 [[TMP105]], label [[TMP108:%.*]], label [[TMP109:%.*]] +; CHECK: 108: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP96]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP109]] +; CHECK: 109: +; CHECK-NEXT: br label [[TMP110]] +; CHECK: 110: +; CHECK-NEXT: store i8 8, ptr addrspace(1) [[TMP95]], align 8 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP36:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP36]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(1) [[TMP35]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP38]], i64 [[TMP37]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + store i32 8, ptr addrspace(3) @lds_2, align 8 + store i8 7, ptr addrspace(3) @lds_3, align 4 + store i8 8, ptr addrspace(3) @lds_4, align 8 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll new file mode 100644 index 0000000000000..e0bfca0f63ca7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-dynamic-lds-test.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static and dynamic LDS accesses are lowered correctly in kernel. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8 +@lds_3 = external addrspace(3) global [0 x i8], align 4 +@lds_4 = external addrspace(3) global [0 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 0, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 0, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP21:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 7 +; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], 8 +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 8 +; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP8]], [[TMP14]] +; CHECK-NEXT: store i32 [[TMP15]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP31]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = udiv i32 [[TMP17]], 8 +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 8 +; CHECK-NEXT: store i32 [[TMP19]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP15]], [[TMP19]] +; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP32]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP30]], i64 [[TMP23]]) +; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP39]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP20]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 8 +; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 24) +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 33 +; CHECK-NEXT: [[TMP111:%.*]] = ptrtoint ptr addrspace(1) [[TMP57]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP111]], i64 31) +; CHECK-NEXT: [[TMP112:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP20]], i64 68 +; CHECK-NEXT: [[TMP113:%.*]] = ptrtoint ptr addrspace(1) [[TMP112]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP113]], i64 28) +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 32: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP35:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP28]] +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP42]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP43]], align 4 +; CHECK-NEXT: [[TMP59:%.*]] = ptrtoint ptr addrspace(3) [[TMP27]] to i32 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP59]] +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP60]], align 8 +; CHECK-NEXT: [[TMP77:%.*]] = ptrtoint ptr addrspace(3) [[TMP29]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP77]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP78]], align 4 +; CHECK-NEXT: [[TMP94:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32 +; CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP35]], i32 [[TMP94]] +; CHECK-NEXT: store i8 8, ptr addrspace(1) [[TMP95]], align 8 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP36:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP36]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = ptrtoint ptr addrspace(1) [[TMP35]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP38]], i64 [[TMP37]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + store i32 8, ptr addrspace(3) @lds_2, align 8 + store i8 7, ptr addrspace(3) @lds_3, align 4 + store i8 8, ptr addrspace(3) @lds_4, align 8 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8,8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 8, i32 9} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll new file mode 100644 index 0000000000000..3a05f93df35a3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll @@ -0,0 +1,224 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [3 x i8], align 4 +@lds_4 = external addrspace(3) global [4 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +;. +define void @use_variables() sanitize_address { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]] +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = lshr i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 2147450880 +; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr +; CHECK-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP20]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP23:%.*]] = trunc i64 [[TMP22]] to i8 +; CHECK-NEXT: [[TMP24:%.*]] = icmp sge i8 [[TMP23]], [[TMP20]] +; CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP21]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 0 +; CHECK-NEXT: br i1 [[TMP27]], label [[ASAN_REPORT:%.*]], label [[TMP30:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP25]], label [[TMP28:%.*]], label [[TMP29:%.*]] +; CHECK: 28: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: br label [[TMP30]] +; CHECK: 30: +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + %X = addrspacecast ptr addrspace(3) @lds_3 to ptr + store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP24]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP26]], i64 24) +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 33 +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP28]], i64 31) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68 +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP45]], i64 28) +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 99 +; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr addrspace(1) [[TMP65]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP66]], i64 29) +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132 +; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 24: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP17]] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP11]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP31]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = lshr i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP33]], 2147450880 +; CHECK-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP35]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = icmp ne i8 [[TMP36]], 0 +; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP32]], 7 +; CHECK-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP38]] to i8 +; CHECK-NEXT: [[TMP40:%.*]] = icmp sge i8 [[TMP39]], [[TMP36]] +; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]] +; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]]) +; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0 +; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT:%.*]], label [[TMP46:%.*]], !prof [[PROF2]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[CONDFREE:%.*]] +; CHECK: 44: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 45: +; CHECK-NEXT: br label [[TMP46]] +; CHECK: 46: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]] +; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64 +; CHECK-NEXT: [[TMP55:%.*]] = add i64 [[TMP49]], 3 +; CHECK-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP55]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP83:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = lshr i64 [[TMP83]], 3 +; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], 2147450880 +; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr +; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1 +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne i8 [[TMP53]], 0 +; CHECK-NEXT: [[TMP56:%.*]] = and i64 [[TMP83]], 7 +; CHECK-NEXT: [[TMP57:%.*]] = trunc i64 [[TMP56]] to i8 +; CHECK-NEXT: [[TMP58:%.*]] = icmp sge i8 [[TMP57]], [[TMP53]] +; CHECK-NEXT: [[TMP59:%.*]] = and i1 [[TMP54]], [[TMP58]] +; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP59]]) +; CHECK-NEXT: [[TMP61:%.*]] = icmp ne i64 [[TMP60]], 0 +; CHECK-NEXT: br i1 [[TMP61]], label [[ASAN_REPORT1:%.*]], label [[TMP64:%.*]], !prof [[PROF2]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP59]], label [[TMP62:%.*]], label [[TMP63:%.*]] +; CHECK: 64: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP83]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP63]] +; CHECK: 65: +; CHECK-NEXT: br label [[TMP64]] +; CHECK: 66: +; CHECK-NEXT: [[TMP84:%.*]] = ptrtoint ptr addrspace(1) [[TMP82]] to i64 +; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP84]], 3 +; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[TMP85]], 2147450880 +; CHECK-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP69]] to ptr +; CHECK-NEXT: [[TMP71:%.*]] = load i8, ptr [[TMP70]], align 1 +; CHECK-NEXT: [[TMP72:%.*]] = icmp ne i8 [[TMP71]], 0 +; CHECK-NEXT: [[TMP73:%.*]] = and i64 [[TMP84]], 7 +; CHECK-NEXT: [[TMP74:%.*]] = trunc i64 [[TMP73]] to i8 +; CHECK-NEXT: [[TMP75:%.*]] = icmp sge i8 [[TMP74]], [[TMP71]] +; CHECK-NEXT: [[TMP76:%.*]] = and i1 [[TMP72]], [[TMP75]] +; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP76]]) +; CHECK-NEXT: [[TMP78:%.*]] = icmp ne i64 [[TMP77]], 0 +; CHECK-NEXT: br i1 [[TMP78]], label [[ASAN_REPORT2:%.*]], label [[TMP81:%.*]], !prof [[PROF2]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP76]], label [[TMP79:%.*]], label [[TMP80:%.*]] +; CHECK: 79: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP84]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP80]] +; CHECK: 80: +; CHECK-NEXT: br label [[TMP81]] +; CHECK: 81: +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables() + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META3]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll new file mode 100644 index 0000000000000..a70db2259cc3f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param-asan.ll @@ -0,0 +1,152 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if LDS accesses are lowered correctly when LDS is passed as function +; argument to non-kernel. + +@lds_var = internal addrspace(3) global [1024 x i32] poison, align 4 + +;. +; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address +;. +define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address { +; CHECK-LABEL: define void @my_function( +; CHECK-SAME: ptr addrspace(3) [[LDS_ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr addrspace(1) [[TMP6]] to i64 +; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 2147450880 +; CHECK-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i8 [[TMP11]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP7]], 7 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i8 +; CHECK-NEXT: [[TMP16:%.*]] = icmp sge i8 [[TMP15]], [[TMP11]] +; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[TMP12]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP17]]) +; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0 +; CHECK-NEXT: br i1 [[TMP19]], label [[ASAN_REPORT:%.*]], label [[TMP22:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP17]], label [[TMP20:%.*]], label [[TMP21:%.*]] +; CHECK: 20: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP7]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP21]] +; CHECK: 21: +; CHECK-NEXT: br label [[TMP22]] +; CHECK: 22: +; CHECK-NEXT: [[LDS_VAL:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4 +; CHECK-NEXT: [[NEW_LDS_VAL:%.*]] = add i32 [[LDS_VAL]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], 2147450880 +; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr +; CHECK-NEXT: [[TMP30:%.*]] = load i8, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i8 [[TMP30]], 0 +; CHECK-NEXT: [[TMP32:%.*]] = and i64 [[TMP26]], 7 +; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i8 +; CHECK-NEXT: [[TMP35:%.*]] = icmp sge i8 [[TMP34]], [[TMP30]] +; CHECK-NEXT: [[TMP36:%.*]] = and i1 [[TMP31]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP36]]) +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i64 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[TMP38]], label [[ASAN_REPORT1:%.*]], label [[TMP41:%.*]], !prof [[PROF1]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP40:%.*]] +; CHECK: 39: +; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP26]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP40]] +; CHECK: 40: +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 41: +; CHECK-NEXT: store i32 [[NEW_LDS_VAL]], ptr addrspace(1) [[TMP25]], align 4 +; CHECK-NEXT: ret void +; + %lds_val = load i32, ptr addrspace(3) %lds_arg, align 4 + %new_lds_val = add i32 %lds_val, 1 + store i32 %new_lds_val, ptr addrspace(3) %lds_arg, align 4 + ret void +} + +define amdgpu_kernel void @my_kernel() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @my_kernel( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP13]], i64 [[TMP15]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP16]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP22]], i64 24) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 4128 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP24]], i64 1024) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 18: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP17:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP8]] +; CHECK-NEXT: [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP9]], i32 0, i32 0 +; CHECK-NEXT: call void @my_function(ptr addrspace(3) [[LDS_PTR]]) +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP18:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr addrspace(1) [[TMP17]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP20]], i64 [[TMP19]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) @lds_var, i32 0, i32 0 + call void @my_function(ptr addrspace(3) %lds_ptr) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META2]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll new file mode 100644 index 0000000000000..55a36f85dc73a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-function-param.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if LDS accesses are lowered correctly when LDS is passed as function +; argument to non-kernel. + +@lds_var = internal addrspace(3) global [1024 x i32] poison, align 4 + +;. +; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 32, i32 4096, i32 5120 } }, no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel], no_sanitize_address +;. +define void @my_function(ptr addrspace(3) %lds_arg) sanitize_address { +; CHECK-LABEL: define void @my_function( +; CHECK-SAME: ptr addrspace(3) [[LDS_ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP3]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP5]] +; CHECK-NEXT: [[LDS_VAL:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4 +; CHECK-NEXT: [[NEW_LDS_VAL:%.*]] = add i32 [[LDS_VAL]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP24]] +; CHECK-NEXT: store i32 [[NEW_LDS_VAL]], ptr addrspace(1) [[TMP25]], align 4 +; CHECK-NEXT: ret void +; + %lds_val = load i32, ptr addrspace(3) %lds_arg, align 4 + %new_lds_val = add i32 %lds_val, 1 + store i32 %new_lds_val, ptr addrspace(3) %lds_arg, align 4 + ret void +} + +define amdgpu_kernel void @my_kernel() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @my_kernel( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP11]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP13]], i64 [[TMP15]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP16]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP22]], i64 24) +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 4128 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP24]], i64 1024) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 18: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP17:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_MY_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP8]] +; CHECK-NEXT: [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP9]], i32 0, i32 0 +; CHECK-NEXT: call void @my_function(ptr addrspace(3) [[LDS_PTR]]) +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP18:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr addrspace(1) [[TMP17]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP20]], i64 [[TMP19]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) @lds_var, i32 0, i32 0 + call void @my_function(ptr addrspace(3) %lds_ptr) + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll new file mode 100644 index 0000000000000..1dd391ec6321a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested-asan.ll @@ -0,0 +1,279 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if LDS accesses are lowered correctly when a call is made to nested non-kernel. + +@A = external addrspace(3) global [8 x ptr] +@B = external addrspace(3) global [0 x i32] + +define amdgpu_kernel void @kernel_0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 18: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8 +; CHECK-NEXT: call void @call_store_A() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @call_store_A() + ret void +} + +define amdgpu_kernel void @kernel_1() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_1( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 23: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8 +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ] +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %ptr = call ptr @get_B_ptr() + ret void +} + +define amdgpu_kernel void @kernel_2() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_2( +; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 18: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8 +; CHECK-NEXT: call void @store_A() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @store_A() + ret void +} + +define amdgpu_kernel void @kernel_3() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_3( +; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 23: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8 +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ] +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %ptr = call ptr @get_B_ptr() + ret void +} + +define private void @call_store_A() sanitize_address { +; CHECK-LABEL: define private void @call_store_A( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @store_A() +; CHECK-NEXT: ret void +; + call void @store_A() + ret void +} + +define private void @store_A() sanitize_address { +; CHECK-LABEL: define private void @store_A( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: store ptr [[TMP10]], ptr null, align 8 +; CHECK-NEXT: ret void +; + store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null + ret void +} + +define private ptr @get_B_ptr() sanitize_address { +; CHECK-LABEL: define private ptr @get_B_ptr( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: ret ptr [[TMP10]] +; + ret ptr addrspacecast (ptr addrspace(3) @B to ptr) +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: [[META2]] = !{i32 0} +; CHECK: [[META3]] = !{i32 1} +; CHECK: [[META4]] = !{i32 2} +; CHECK: [[META5]] = !{i32 3} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll new file mode 100644 index 0000000000000..ed9107764eb91 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-nested.ll @@ -0,0 +1,279 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if LDS accesses are lowered correctly when a call is made to nested non-kernel. + +@A = external addrspace(3) global [8 x ptr] +@B = external addrspace(3) global [0 x i32] + +define amdgpu_kernel void @kernel_0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 18: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_0, align 8 +; CHECK-NEXT: call void @call_store_A() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @call_store_A() + ret void +} + +define amdgpu_kernel void @kernel_1() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_1( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, align 4 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_1_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_1.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 23: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_1, align 8 +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_1.dynlds) ] +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %ptr = call ptr @get_B_ptr() + ret void +} + +define amdgpu_kernel void @kernel_2() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_2( +; CHECK-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META4:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_2_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_2.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP12]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP14]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP21]], i64 24) +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 96 +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 32) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 18: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_2, align 8 +; CHECK-NEXT: call void @store_A() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP16:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr addrspace(1) [[TMP15]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP18]], i64 [[TMP17]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @store_A() + ret void +} + +define amdgpu_kernel void @kernel_3() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @kernel_3( +; CHECK-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META5:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP14:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, align 4 +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP12]], [[TMP20]] +; CHECK-NEXT: [[TMP6:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP6]], i64 15 +; CHECK-NEXT: store i32 [[TMP21]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(4) [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 3 +; CHECK-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 +; CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_KERNEL_3_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.kernel_3.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP21]], [[TMP11]] +; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP16]], i64 [[TMP18]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP19]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(1) [[TMP26]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP27]], i64 24) +; CHECK-NEXT: br label [[TMP14]] +; CHECK: 23: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP22:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.kernel_3, align 8 +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel_3.dynlds) ] +; CHECK-NEXT: [[PTR:%.*]] = call ptr @get_B_ptr() +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP25]], i64 [[TMP24]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %ptr = call ptr @get_B_ptr() + ret void +} + +define private void @call_store_A() sanitize_address { +; CHECK-LABEL: define private void @call_store_A( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: call void @store_A() +; CHECK-NEXT: ret void +; + call void @store_A() + ret void +} + +define private void @store_A() sanitize_address { +; CHECK-LABEL: define private void @store_A( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: store ptr [[TMP10]], ptr null, align 8 +; CHECK-NEXT: ret void +; + store ptr addrspacecast (ptr addrspace(3) @A to ptr), ptr null + ret void +} + +define private ptr @get_B_ptr() sanitize_address { +; CHECK-LABEL: define private ptr @get_B_ptr( +; CHECK-SAME: ) #[[ATTR2]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: ret ptr [[TMP10]] +; + ret ptr addrspacecast (ptr addrspace(3) @B to ptr) +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: [[META2]] = !{i32 0} +; CHECK: [[META3]] = !{i32 1} +; CHECK: [[META4]] = !{i32 2} +; CHECK: [[META5]] = !{i32 3} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll new file mode 100644 index 0000000000000..11e912287c7f7 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -amdgpu-asan-instrument-lds=false -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 1 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 2 +@lds_3 = external addrspace(3) global [3 x i8], align 4 +@lds_4 = external addrspace(3) global [4 x i8], align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 96, i32 3, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 128, i32 4, i32 32 } }, no_sanitize_address +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +;. +define void @use_variables() sanitize_address { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP4]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) [[TMP5]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]] +; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr +; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]] +; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8 +; CHECK-NEXT: ret void +; + %X = addrspacecast ptr addrspace(3) @lds_3 to ptr + store i8 3, ptr addrspacecast( ptr addrspace(3) @lds_3 to ptr), align 4 + store i8 3, ptr addrspace(3) @lds_4, align 8 + ret void +} + +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META1:![0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP24]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP26]], i64 24) +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 33 +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP28]], i64 31) +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68 +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP45]], i64 28) +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 99 +; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr addrspace(1) [[TMP65]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP66]], i64 29) +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132 +; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 24: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP17]] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr addrspace(3) [[TMP11]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP30]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]] +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + call void @use_variables() + store i8 7, ptr addrspace(3) @lds_1, align 1 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 0} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll new file mode 100644 index 0000000000000..301bda7e0086e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-asan.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static LDS accesses in kernel are lowered correctly. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 24) +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 33 +; CHECK-NEXT: [[TMP62:%.*]] = ptrtoint ptr addrspace(1) [[TMP61]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP62]], i64 31) +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68 +; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP64]], i64 28) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 20: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP17]] +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = ptrtoint ptr addrspace(1) [[TMP27]] to i64 +; CHECK-NEXT: [[TMP29:%.*]] = lshr i64 [[TMP28]], 3 +; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP29]], 2147450880 +; CHECK-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP30]] to ptr +; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP31]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne i8 [[TMP32]], 0 +; CHECK-NEXT: [[TMP34:%.*]] = and i64 [[TMP28]], 7 +; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i8 +; CHECK-NEXT: [[TMP36:%.*]] = icmp sge i8 [[TMP35]], [[TMP32]] +; CHECK-NEXT: [[TMP37:%.*]] = and i1 [[TMP33]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP37]]) +; CHECK-NEXT: [[TMP39:%.*]] = icmp ne i64 [[TMP38]], 0 +; CHECK-NEXT: br i1 [[TMP39]], label [[ASAN_REPORT:%.*]], label [[TMP42:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP37]], label [[TMP40:%.*]], label [[CONDFREE:%.*]] +; CHECK: 40: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP28]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[CONDFREE]] +; CHECK: 41: +; CHECK-NEXT: br label [[TMP42]] +; CHECK: 42: +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4 +; CHECK-NEXT: [[TMP43:%.*]] = ptrtoint ptr addrspace(3) [[TMP24]] to i32 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64 +; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP45]], 3 +; CHECK-NEXT: [[TMP78:%.*]] = inttoptr i64 [[TMP51]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP79:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64 +; CHECK-NEXT: [[TMP46:%.*]] = lshr i64 [[TMP79]], 3 +; CHECK-NEXT: [[TMP47:%.*]] = add i64 [[TMP46]], 2147450880 +; CHECK-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr +; CHECK-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1 +; CHECK-NEXT: [[TMP50:%.*]] = icmp ne i8 [[TMP49]], 0 +; CHECK-NEXT: [[TMP52:%.*]] = and i64 [[TMP79]], 7 +; CHECK-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i8 +; CHECK-NEXT: [[TMP54:%.*]] = icmp sge i8 [[TMP53]], [[TMP49]] +; CHECK-NEXT: [[TMP55:%.*]] = and i1 [[TMP50]], [[TMP54]] +; CHECK-NEXT: [[TMP56:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP55]]) +; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i64 [[TMP56]], 0 +; CHECK-NEXT: br i1 [[TMP57]], label [[ASAN_REPORT1:%.*]], label [[TMP60:%.*]], !prof [[PROF2]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP55]], label [[TMP58:%.*]], label [[TMP59:%.*]] +; CHECK: 60: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP79]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP59]] +; CHECK: 61: +; CHECK-NEXT: br label [[TMP60]] +; CHECK: 62: +; CHECK-NEXT: [[TMP80:%.*]] = ptrtoint ptr addrspace(1) [[TMP78]] to i64 +; CHECK-NEXT: [[TMP81:%.*]] = lshr i64 [[TMP80]], 3 +; CHECK-NEXT: [[TMP65:%.*]] = add i64 [[TMP81]], 2147450880 +; CHECK-NEXT: [[TMP66:%.*]] = inttoptr i64 [[TMP65]] to ptr +; CHECK-NEXT: [[TMP67:%.*]] = load i8, ptr [[TMP66]], align 1 +; CHECK-NEXT: [[TMP68:%.*]] = icmp ne i8 [[TMP67]], 0 +; CHECK-NEXT: [[TMP69:%.*]] = and i64 [[TMP80]], 7 +; CHECK-NEXT: [[TMP70:%.*]] = trunc i64 [[TMP69]] to i8 +; CHECK-NEXT: [[TMP71:%.*]] = icmp sge i8 [[TMP70]], [[TMP67]] +; CHECK-NEXT: [[TMP72:%.*]] = and i1 [[TMP68]], [[TMP71]] +; CHECK-NEXT: [[TMP73:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP72]]) +; CHECK-NEXT: [[TMP74:%.*]] = icmp ne i64 [[TMP73]], 0 +; CHECK-NEXT: br i1 [[TMP74]], label [[ASAN_REPORT2:%.*]], label [[TMP77:%.*]], !prof [[PROF2]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP72]], label [[TMP75:%.*]], label [[TMP76:%.*]] +; CHECK: 75: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP80]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP76]] +; CHECK: 76: +; CHECK-NEXT: br label [[TMP77]] +; CHECK: 77: +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP44]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll new file mode 100644 index 0000000000000..02a241f947748 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomic-cmpxchg-asan.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +@lds_1 = internal addrspace(3) global [1 x i32] poison, align 4 + +;. +; CHECK: @llvm.amdgcn.sw.lds.atomic_xchg_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.atomic_xchg_kernel.md.type { %llvm.amdgcn.sw.lds.atomic_xchg_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.atomic_xchg_kernel.md.item { i32 32, i32 4, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @atomic_xchg_kernel(ptr addrspace(1) %out, [8 x i32], [8 x i32], i32 %swap) sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @atomic_xchg_kernel( +; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], [8 x i32] [[TMP0:%.*]], [8 x i32] [[TMP1:%.*]], i32 [[SWAP:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MALLOC:%.*]], label [[TMP20:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMIC_XCHG_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMIC_XCHG_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP11]], i64 [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP15]], ptr addrspace(3) @llvm.amdgcn.sw.lds.atomic_xchg_kernel, align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 24) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP15]], i64 36 +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28) +; CHECK-NEXT: br label [[TMP20]] +; CHECK: 20: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.atomic_xchg_kernel, align 8 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMIC_XCHG_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomic_xchg_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.atomic_xchg_kernel, i32 [[TMP22]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP23]], i32 4 +; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = add i64 [[TMP26]], 3 +; CHECK-NEXT: [[TMP59:%.*]] = inttoptr i64 [[TMP32]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP60:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP60]], 3 +; CHECK-NEXT: [[TMP28:%.*]] = add i64 [[TMP27]], 2147450880 +; CHECK-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr +; CHECK-NEXT: [[TMP30:%.*]] = load i8, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i8 [[TMP30]], 0 +; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP60]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[TMP33]] to i8 +; CHECK-NEXT: [[TMP35:%.*]] = icmp sge i8 [[TMP34]], [[TMP30]] +; CHECK-NEXT: [[TMP36:%.*]] = and i1 [[TMP31]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP36]]) +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i64 [[TMP37]], 0 +; CHECK-NEXT: br i1 [[TMP38]], label [[ASAN_REPORT:%.*]], label [[TMP41:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP36]], label [[TMP39:%.*]], label [[TMP40:%.*]] +; CHECK: 41: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP60]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP40]] +; CHECK: 42: +; CHECK-NEXT: br label [[TMP41]] +; CHECK: 43: +; CHECK-NEXT: [[TMP61:%.*]] = ptrtoint ptr addrspace(1) [[TMP59]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = lshr i64 [[TMP61]], 3 +; CHECK-NEXT: [[TMP46:%.*]] = add i64 [[TMP62]], 2147450880 +; CHECK-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP46]] to ptr +; CHECK-NEXT: [[TMP48:%.*]] = load i8, ptr [[TMP47]], align 1 +; CHECK-NEXT: [[TMP49:%.*]] = icmp ne i8 [[TMP48]], 0 +; CHECK-NEXT: [[TMP50:%.*]] = and i64 [[TMP61]], 7 +; CHECK-NEXT: [[TMP51:%.*]] = trunc i64 [[TMP50]] to i8 +; CHECK-NEXT: [[TMP52:%.*]] = icmp sge i8 [[TMP51]], [[TMP48]] +; CHECK-NEXT: [[TMP53:%.*]] = and i1 [[TMP49]], [[TMP52]] +; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP53]]) +; CHECK-NEXT: [[TMP55:%.*]] = icmp ne i64 [[TMP54]], 0 +; CHECK-NEXT: br i1 [[TMP55]], label [[ASAN_REPORT1:%.*]], label [[TMP58:%.*]], !prof [[PROF2]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP53]], label [[TMP56:%.*]], label [[TMP57:%.*]] +; CHECK: 56: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP61]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP57]] +; CHECK: 57: +; CHECK-NEXT: br label [[TMP58]] +; CHECK: 58: +; CHECK-NEXT: [[TMP42:%.*]] = cmpxchg ptr addrspace(1) [[TMP25]], i32 7, i32 [[SWAP]] seq_cst monotonic, align 4 +; CHECK-NEXT: [[RESULT:%.*]] = extractvalue { i32, i1 } [[TMP42]], 0 +; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4 +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP43:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP45]], i64 [[TMP44]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %gep = getelementptr i32, ptr addrspace(3) @lds_1, i32 4 + %pair = cmpxchg ptr addrspace(3) %gep, i32 7, i32 %swap seq_cst monotonic + %result = extractvalue { i32, i1 } %pair, 0 + store i32 %result, ptr addrspace(1) %out, align 4 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll new file mode 100644 index 0000000000000..b87b3fd824dd3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-atomicrmw-asan.ll @@ -0,0 +1,214 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +@lds_1 = internal addrspace(3) global [1 x i32] poison, align 4 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 4 + +; Test to check if static LDS accesses in kernel are lowered correctly. +;. +; CHECK: @llvm.amdgcn.sw.lds.atomicrmw_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.atomicrmw_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.atomicrmw_kernel.md.type { %llvm.amdgcn.sw.lds.atomicrmw_kernel.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.atomicrmw_kernel.md.item { i32 32, i32 4, i32 32 }, %llvm.amdgcn.sw.lds.atomicrmw_kernel.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @atomicrmw_kernel(ptr addrspace(1) %arg0) sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @atomicrmw_kernel( +; CHECK-SAME: ptr addrspace(1) [[ARG0:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP0]], [[TMP26]] +; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP64]], [[TMP45]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP65]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP20:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.atomicrmw_kernel, align 8 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24) +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 36 +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 28) +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 68 +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr addrspace(1) [[TMP18]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP19]], i64 28) +; CHECK-NEXT: br label [[TMP20]] +; CHECK: 20: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP21:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.atomicrmw_kernel, align 8 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.atomicrmw_kernel, i32 [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_ATOMICRMW_KERNEL_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.atomicrmw_kernel.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.atomicrmw_kernel, i32 [[TMP24]] +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i32, ptr addrspace(1) [[ARG0]], align 4 +; CHECK-NEXT: [[TMP27:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[TMP29]], 3 +; CHECK-NEXT: [[TMP98:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP99:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = lshr i64 [[TMP99]], 3 +; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], 2147450880 +; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i8 [[TMP33]], 0 +; CHECK-NEXT: [[TMP36:%.*]] = and i64 [[TMP99]], 7 +; CHECK-NEXT: [[TMP37:%.*]] = trunc i64 [[TMP36]] to i8 +; CHECK-NEXT: [[TMP38:%.*]] = icmp sge i8 [[TMP37]], [[TMP33]] +; CHECK-NEXT: [[TMP39:%.*]] = and i1 [[TMP34]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = icmp ne i64 [[TMP40]], 0 +; CHECK-NEXT: br i1 [[TMP41]], label [[ASAN_REPORT:%.*]], label [[TMP44:%.*]], !prof [[PROF2:![0-9]+]] +; CHECK: asan.report: +; CHECK-NEXT: br i1 [[TMP39]], label [[TMP42:%.*]], label [[TMP43:%.*]] +; CHECK: 44: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP99]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP43]] +; CHECK: 45: +; CHECK-NEXT: br label [[TMP44]] +; CHECK: 46: +; CHECK-NEXT: [[TMP100:%.*]] = ptrtoint ptr addrspace(1) [[TMP98]] to i64 +; CHECK-NEXT: [[TMP101:%.*]] = lshr i64 [[TMP100]], 3 +; CHECK-NEXT: [[TMP102:%.*]] = add i64 [[TMP101]], 2147450880 +; CHECK-NEXT: [[TMP103:%.*]] = inttoptr i64 [[TMP102]] to ptr +; CHECK-NEXT: [[TMP104:%.*]] = load i8, ptr [[TMP103]], align 1 +; CHECK-NEXT: [[TMP105:%.*]] = icmp ne i8 [[TMP104]], 0 +; CHECK-NEXT: [[TMP106:%.*]] = and i64 [[TMP100]], 7 +; CHECK-NEXT: [[TMP54:%.*]] = trunc i64 [[TMP106]] to i8 +; CHECK-NEXT: [[TMP107:%.*]] = icmp sge i8 [[TMP54]], [[TMP104]] +; CHECK-NEXT: [[TMP108:%.*]] = and i1 [[TMP105]], [[TMP107]] +; CHECK-NEXT: [[TMP109:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP108]]) +; CHECK-NEXT: [[TMP110:%.*]] = icmp ne i64 [[TMP109]], 0 +; CHECK-NEXT: br i1 [[TMP110]], label [[ASAN_REPORT1:%.*]], label [[TMP111:%.*]], !prof [[PROF2]] +; CHECK: asan.report1: +; CHECK-NEXT: br i1 [[TMP108]], label [[TMP112:%.*]], label [[TMP113:%.*]] +; CHECK: 59: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP100]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP113]] +; CHECK: 60: +; CHECK-NEXT: br label [[TMP111]] +; CHECK: 61: +; CHECK-NEXT: [[TMP2:%.*]] = atomicrmw umin ptr addrspace(1) [[TMP28]], i32 [[TMP1]] seq_cst, align 4 +; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP23]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP46]] +; CHECK-NEXT: [[TMP48:%.*]] = ptrtoint ptr addrspace(1) [[TMP47]] to i64 +; CHECK-NEXT: [[TMP114:%.*]] = add i64 [[TMP48]], 3 +; CHECK-NEXT: [[TMP115:%.*]] = inttoptr i64 [[TMP114]] to ptr addrspace(1) +; CHECK-NEXT: [[TMP116:%.*]] = ptrtoint ptr addrspace(1) [[TMP47]] to i64 +; CHECK-NEXT: [[TMP49:%.*]] = lshr i64 [[TMP116]], 3 +; CHECK-NEXT: [[TMP50:%.*]] = add i64 [[TMP49]], 2147450880 +; CHECK-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP50]] to ptr +; CHECK-NEXT: [[TMP52:%.*]] = load i8, ptr [[TMP51]], align 1 +; CHECK-NEXT: [[TMP53:%.*]] = icmp ne i8 [[TMP52]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = and i64 [[TMP116]], 7 +; CHECK-NEXT: [[TMP56:%.*]] = trunc i64 [[TMP55]] to i8 +; CHECK-NEXT: [[TMP57:%.*]] = icmp sge i8 [[TMP56]], [[TMP52]] +; CHECK-NEXT: [[TMP58:%.*]] = and i1 [[TMP53]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]]) +; CHECK-NEXT: [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0 +; CHECK-NEXT: br i1 [[TMP60]], label [[ASAN_REPORT2:%.*]], label [[TMP63:%.*]], !prof [[PROF2]] +; CHECK: asan.report2: +; CHECK-NEXT: br i1 [[TMP58]], label [[TMP61:%.*]], label [[TMP62:%.*]] +; CHECK: 80: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP116]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP62]] +; CHECK: 81: +; CHECK-NEXT: br label [[TMP63]] +; CHECK: 82: +; CHECK-NEXT: [[TMP117:%.*]] = ptrtoint ptr addrspace(1) [[TMP115]] to i64 +; CHECK-NEXT: [[TMP118:%.*]] = lshr i64 [[TMP117]], 3 +; CHECK-NEXT: [[TMP119:%.*]] = add i64 [[TMP118]], 2147450880 +; CHECK-NEXT: [[TMP120:%.*]] = inttoptr i64 [[TMP119]] to ptr +; CHECK-NEXT: [[TMP87:%.*]] = load i8, ptr [[TMP120]], align 1 +; CHECK-NEXT: [[TMP88:%.*]] = icmp ne i8 [[TMP87]], 0 +; CHECK-NEXT: [[TMP89:%.*]] = and i64 [[TMP117]], 7 +; CHECK-NEXT: [[TMP90:%.*]] = trunc i64 [[TMP89]] to i8 +; CHECK-NEXT: [[TMP91:%.*]] = icmp sge i8 [[TMP90]], [[TMP87]] +; CHECK-NEXT: [[TMP92:%.*]] = and i1 [[TMP88]], [[TMP91]] +; CHECK-NEXT: [[TMP93:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP92]]) +; CHECK-NEXT: [[TMP94:%.*]] = icmp ne i64 [[TMP93]], 0 +; CHECK-NEXT: br i1 [[TMP94]], label [[ASAN_REPORT3:%.*]], label [[TMP97:%.*]], !prof [[PROF2]] +; CHECK: asan.report3: +; CHECK-NEXT: br i1 [[TMP92]], label [[TMP95:%.*]], label [[TMP96:%.*]] +; CHECK: 95: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP117]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP96]] +; CHECK: 96: +; CHECK-NEXT: br label [[TMP97]] +; CHECK: 97: +; CHECK-NEXT: [[TMP3:%.*]] = atomicrmw umax ptr addrspace(1) [[TMP47]], i32 [[TMP1]] seq_cst, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr addrspace(3) [[TMP25]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP21]], i32 [[TMP66]] +; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP68]], 3 +; CHECK-NEXT: [[TMP70:%.*]] = add i64 [[TMP69]], 2147450880 +; CHECK-NEXT: [[TMP71:%.*]] = inttoptr i64 [[TMP70]] to ptr +; CHECK-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1 +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i8 [[TMP72]], 0 +; CHECK-NEXT: [[TMP74:%.*]] = and i64 [[TMP68]], 7 +; CHECK-NEXT: [[TMP75:%.*]] = add i64 [[TMP74]], 3 +; CHECK-NEXT: [[TMP76:%.*]] = trunc i64 [[TMP75]] to i8 +; CHECK-NEXT: [[TMP77:%.*]] = icmp sge i8 [[TMP76]], [[TMP72]] +; CHECK-NEXT: [[TMP78:%.*]] = and i1 [[TMP73]], [[TMP77]] +; CHECK-NEXT: [[TMP79:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP78]]) +; CHECK-NEXT: [[TMP80:%.*]] = icmp ne i64 [[TMP79]], 0 +; CHECK-NEXT: br i1 [[TMP80]], label [[ASAN_REPORT4:%.*]], label [[TMP83:%.*]], !prof [[PROF2]] +; CHECK: asan.report4: +; CHECK-NEXT: br i1 [[TMP78]], label [[TMP81:%.*]], label [[TMP82:%.*]] +; CHECK: 115: +; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP68]]) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label [[TMP82]] +; CHECK: 116: +; CHECK-NEXT: br label [[TMP83]] +; CHECK: 117: +; CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(1) [[TMP67]], align 4 +; CHECK-NEXT: br label [[CONDFREE:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP84:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP85:%.*]] = ptrtoint ptr [[TMP84]] to i64 +; CHECK-NEXT: [[TMP86:%.*]] = ptrtoint ptr addrspace(1) [[TMP21]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP86]], i64 [[TMP85]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + %1 = load volatile i32, ptr addrspace(1) %arg0 + %2 = atomicrmw umin ptr addrspace(3) @lds_1, i32 %1 seq_cst + %3 = atomicrmw umax ptr addrspace(3) @lds_1, i32 %1 seq_cst + %4 = add i32 %2, %3 + store i32 %4, ptr addrspace(3) @lds_2, align 4 + ret void +} +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll new file mode 100644 index 0000000000000..806a4aa70edcf --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 +; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -amdgpu-asan-instrument-lds=false -mtriple=amdgcn-amd-amdhsa | FileCheck %s + +; Test to check if static LDS accesses in kernel are lowered correctly. +@lds_1 = internal addrspace(3) global [1 x i8] poison, align 4 +@lds_2 = internal addrspace(3) global [1 x i32] poison, align 8 + +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 1, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 64, i32 4, i32 32 } }, no_sanitize_address +;. +define amdgpu_kernel void @k0() sanitize_address { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: WId: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]] +; CHECK: Malloc: +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP16]] to i64 +; CHECK-NEXT: [[TMP23:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP23]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP15]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP6]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP25]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 24) +; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 33 +; CHECK-NEXT: [[TMP62:%.*]] = ptrtoint ptr addrspace(1) [[TMP61]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP62]], i64 31) +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 68 +; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP64]], i64 28) +; CHECK-NEXT: br label [[TMP7]] +; CHECK: 20: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP17]] +; CHECK-NEXT: [[TMP26:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP26]] +; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP27]], align 4 +; CHECK-NEXT: [[TMP43:%.*]] = ptrtoint ptr addrspace(3) [[TMP24]] to i32 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP43]] +; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP44]], align 2 +; CHECK-NEXT: br label [[CONDFREE1:%.*]] +; CHECK: CondFree: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]] +; CHECK: Free: +; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]]) +; CHECK-NEXT: br label [[END]] +; CHECK: End: +; CHECK-NEXT: ret void +; + store i8 7, ptr addrspace(3) @lds_1, align 4 + store i32 8, ptr addrspace(3) @lds_2, align 2 + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"nosanitize_address", i32 1} + +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="8" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +;.