diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 570f089e91469..37d6084ca1d25 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -338,6 +338,12 @@ static cl::opt EnableScalarIRPasses( cl::init(true), cl::Hidden); +static cl::opt + EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", + cl::desc("Enable lowering of lds to global memory pass " + "and asan instrument resulting IR."), + cl::init(true), cl::Hidden); + static cl::opt EnableLowerModuleLDS( "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), @@ -759,6 +765,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { // We want to support the -lto-partitions=N option as "best effort". // For that, we need to lower LDS earlier in the pipeline before the // module is partitioned for codegen. + if (EnableSwLowerLDS) + PM.addPass(AMDGPUSwLowerLDSPass(*this)); if (EnableLowerModuleLDS) PM.addPass(AMDGPULowerModuleLDSPass(*this)); @@ -1068,6 +1076,10 @@ void AMDGPUPassConfig::addIRPasses() { // Replace OpenCL enqueued block function pointers with global variables. addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass()); + // Lower LDS accesses to global memory pass if address sanitizer is enabled. + if (EnableSwLowerLDS) + addPass(createAMDGPUSwLowerLDSLegacyPass(&TM)); + // Runs before PromoteAlloca so the latter can account for function uses if (EnableLowerModuleLDS) { addPass(createAMDGPULowerModuleLDSLegacyPass(&TM)); diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 1b1ea52520c0b..57575f786152b 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -39,6 +39,7 @@ ; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O0-NEXT: Function Alias Analysis Results ; GCN-O0-NEXT: Lower OpenCL enqueued blocks +; GCN-O0-NEXT: AMDGPU Software lowering of LDS ; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O0-NEXT: FunctionPass Manager ; GCN-O0-NEXT: Expand Atomic instructions @@ -185,6 +186,7 @@ ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-NEXT: AMDGPU Software lowering of LDS ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces @@ -460,6 +462,7 @@ ; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks +; GCN-O1-OPTS-NEXT: AMDGPU Software lowering of LDS ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager ; GCN-O1-OPTS-NEXT: Infer address spaces @@ -765,6 +768,7 @@ ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lower OpenCL enqueued blocks +; GCN-O2-NEXT: AMDGPU Software lowering of LDS ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: Infer address spaces @@ -1074,6 +1078,7 @@ ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lower OpenCL enqueued blocks +; GCN-O3-NEXT: AMDGPU Software lowering of LDS ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Infer address spaces