diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4f8882ed1cc96..ee3a7c54460b4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1610,6 +1610,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; } + if ((AS == AMDGPUAS::CONSTANT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && + AM.BaseOffs < 0) { + // Scalar (non-buffer) loads can only use a negative offset if + // soffset+offset is non-negative. Since the compiler can only prove that + // in a few special cases, it is safer to claim that negative offsets are + // not supported. + return false; + } + if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg. return true; diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll index 41d2360dd5e1e..c7f7f30a5e6bd 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll @@ -279,33 +279,19 @@ end: } define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { -; GFX678-LABEL: test_sink_smem_offset_neg400: -; GFX678: ; %bb.0: ; %entry -; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70 -; GFX678-NEXT: s_addc_u32 s1, s1, -1 -; GFX678-NEXT: .LBB5_1: ; %loop -; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX678-NEXT: s_waitcnt lgkmcnt(0) -; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0 -; GFX678-NEXT: s_add_i32 s2, s2, -1 -; GFX678-NEXT: s_cmp_lg_u32 s2, 0 -; GFX678-NEXT: s_cbranch_scc1 .LBB5_1 -; GFX678-NEXT: ; %bb.2: ; %end -; GFX678-NEXT: s_endpgm -; -; GFX9-LABEL: test_sink_smem_offset_neg400: -; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: .LBB5_1: ; %loop -; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: s_add_i32 s2, s2, -1 -; GFX9-NEXT: s_add_u32 s4, s0, 0xfffffe70 -; GFX9-NEXT: s_addc_u32 s5, s1, -1 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s3, s[4:5], 0x0 -; GFX9-NEXT: s_cmp_lg_u32 s2, 0 -; GFX9-NEXT: s_cbranch_scc1 .LBB5_1 -; GFX9-NEXT: ; %bb.2: ; %end -; GFX9-NEXT: s_endpgm +; GFX6789-LABEL: test_sink_smem_offset_neg400: +; GFX6789: ; %bb.0: ; %entry +; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70 +; GFX6789-NEXT: s_addc_u32 s1, s1, -1 +; GFX6789-NEXT: .LBB5_1: ; %loop +; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX6789-NEXT: s_waitcnt lgkmcnt(0) +; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0 +; GFX6789-NEXT: s_add_i32 s2, s2, -1 +; GFX6789-NEXT: s_cmp_lg_u32 s2, 0 +; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1 +; GFX6789-NEXT: ; %bb.2: ; %end +; GFX6789-NEXT: s_endpgm ; ; GFX12-LABEL: test_sink_smem_offset_neg400: ; GFX12: ; %bb.0: ; %entry @@ -337,3 +323,52 @@ loop: end: ret void } + +; Same for address space 6, constant 32-bit. +define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) { +; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit: +; GFX6789: ; %bb.0: ; %entry +; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70 +; GFX6789-NEXT: s_mov_b32 s3, 0 +; GFX6789-NEXT: .LBB6_1: ; %loop +; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX6789-NEXT: s_waitcnt lgkmcnt(0) +; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0 +; GFX6789-NEXT: s_add_i32 s1, s1, -1 +; GFX6789-NEXT: s_cmp_lg_u32 s1, 0 +; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1 +; GFX6789-NEXT: ; %bb.2: ; %end +; GFX6789-NEXT: s_endpgm +; +; GFX12-LABEL: test_sink_smem_offset_neg400_32bit: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70 +; GFX12-NEXT: s_mov_b32 s3, 0 +; GFX12-NEXT: .LBB6_1: ; %loop +; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-NEXT: s_add_co_i32 s1, s1, -1 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_cmp_lg_u32 s1, 0 +; GFX12-NEXT: s_cbranch_scc1 .LBB6_1 +; GFX12-NEXT: ; %bb.2: ; %end +; GFX12-NEXT: s_endpgm +entry: + %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400 + br label %loop + +loop: + %count = phi i32 [ %dec, %loop ], [ %val, %entry ] + %dec = sub i32 %count, 1 + %load = load volatile i32, ptr addrspace(6) %gep + %cond = icmp eq i32 %dec, 0 + br i1 %cond, label %end, label %loop + +end: + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX678: {{.*}} +; GFX9: {{.*}}