Skip to content

[AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases #145329

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/ritter-x2a/06-12-_amdgpu_sdag_handle_isd_ptradd_in_vop3_patterns
Choose a base branch
from

Conversation

ritter-x2a
Copy link
Member

Pre-committing tests to show improvements in a follow-up PR.

Pre-committing tests to show improvements in a follow-up PR.
@llvmbot
Copy link
Member

llvmbot commented Jun 23, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

Changes

Pre-committing tests to show improvements in a follow-up PR.


Full diff: https://github.com/llvm/llvm-project/pull/145329.diff

2 Files Affected:

  • (added) llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll (+63)
  • (modified) llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll (+206)
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
new file mode 100644
index 0000000000000..fab56383ffa8a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s
+
+; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF.
+
+define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
+; GFX6_PTRADD-LABEL: v_add_i32:
+; GFX6_PTRADD:       ; %bb.0:
+; GFX6_PTRADD-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX6_PTRADD-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX6_PTRADD-NEXT:    s_mov_b32 s7, 0x100f000
+; GFX6_PTRADD-NEXT:    s_mov_b32 s10, 0
+; GFX6_PTRADD-NEXT:    s_mov_b32 s11, s7
+; GFX6_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6_PTRADD-NEXT:    v_mov_b32_e32 v1, s3
+; GFX6_PTRADD-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
+; GFX6_PTRADD-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; GFX6_PTRADD-NEXT:    s_mov_b32 s8, s10
+; GFX6_PTRADD-NEXT:    s_mov_b32 s9, s10
+; GFX6_PTRADD-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
+; GFX6_PTRADD-NEXT:    s_waitcnt vmcnt(0)
+; GFX6_PTRADD-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
+; GFX6_PTRADD-NEXT:    s_waitcnt vmcnt(0)
+; GFX6_PTRADD-NEXT:    s_mov_b32 s6, -1
+; GFX6_PTRADD-NEXT:    s_mov_b32 s4, s0
+; GFX6_PTRADD-NEXT:    s_mov_b32 s5, s1
+; GFX6_PTRADD-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GFX6_PTRADD-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6_PTRADD-NEXT:    s_endpgm
+;
+; GFX6_LEGACY-LABEL: v_add_i32:
+; GFX6_LEGACY:       ; %bb.0:
+; GFX6_LEGACY-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
+; GFX6_LEGACY-NEXT:    s_mov_b32 s7, 0x100f000
+; GFX6_LEGACY-NEXT:    s_mov_b32 s10, 0
+; GFX6_LEGACY-NEXT:    s_mov_b32 s11, s7
+; GFX6_LEGACY-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX6_LEGACY-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6_LEGACY-NEXT:    s_mov_b64 s[8:9], s[2:3]
+; GFX6_LEGACY-NEXT:    v_mov_b32_e32 v1, 0
+; GFX6_LEGACY-NEXT:    buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc
+; GFX6_LEGACY-NEXT:    s_waitcnt vmcnt(0)
+; GFX6_LEGACY-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc
+; GFX6_LEGACY-NEXT:    s_waitcnt vmcnt(0)
+; GFX6_LEGACY-NEXT:    s_mov_b32 s6, -1
+; GFX6_LEGACY-NEXT:    s_mov_b32 s4, s0
+; GFX6_LEGACY-NEXT:    s_mov_b32 s5, s1
+; GFX6_LEGACY-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
+; GFX6_LEGACY-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6_LEGACY-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
+  %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1
+  %a = load volatile i32, ptr addrspace(1) %gep
+  %b = load volatile i32, ptr addrspace(1) %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX6: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
index 34bb98550de04..0cd920616c515 100644
--- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll
@@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) {
   %gep = getelementptr inbounds i8, ptr %base, i64 %mul
   ret ptr %gep
 }
+
+; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr.
+define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) {
+; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm:
+; GFX942_PTRADD:       ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v1, 0
+; GFX942_PTRADD-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v2, 1
+; GFX942_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1]
+; GFX942_PTRADD-NEXT:    global_store_dword v[0:1], v2, off offset:16
+; GFX942_PTRADD-NEXT:    s_endpgm
+;
+; GFX942_LEGACY-LABEL: uniform_base_varying_offset_imm:
+; GFX942_LEGACY:       ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX942_LEGACY-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v1, 1
+; GFX942_LEGACY-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    global_store_dword v0, v1, s[0:1] offset:16
+; GFX942_LEGACY-NEXT:    s_endpgm
+entry:
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %shift = shl i32 %tid, 2
+  %voffset = zext i32 %shift to i64
+  %gep1 = getelementptr inbounds i8, ptr addrspace(1) %p, i64 %voffset
+  %gep2 = getelementptr inbounds i8, ptr addrspace(1) %gep1, i64 16
+  store i32 1, ptr addrspace(1) %gep2
+  ret void
+}
+
+; Adjusted from global-saddr-load.ll. Tests PTRADD handling in
+; AMDGPUDAGToDAGISel::SelectSMRDBaseOffset.
+define amdgpu_kernel void @global_load_saddr_i32_uniform_offset(ptr addrspace(1) %sbase, i32 %soffset, ptr addrspace(1) %r) {
+; GFX942_PTRADD-LABEL: global_load_saddr_i32_uniform_offset:
+; GFX942_PTRADD:       ; %bb.0:
+; GFX942_PTRADD-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942_PTRADD-NEXT:    s_load_dword s6, s[4:5], 0x8
+; GFX942_PTRADD-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    s_add_u32 s0, s0, s6
+; GFX942_PTRADD-NEXT:    s_addc_u32 s1, s1, 0
+; GFX942_PTRADD-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX942_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v1, s0
+; GFX942_PTRADD-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX942_PTRADD-NEXT:    s_endpgm
+;
+; GFX942_LEGACY-LABEL: global_load_saddr_i32_uniform_offset:
+; GFX942_LEGACY:       ; %bb.0:
+; GFX942_LEGACY-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX942_LEGACY-NEXT:    s_load_dword s6, s[4:5], 0x8
+; GFX942_LEGACY-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v0, 0
+; GFX942_LEGACY-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    s_load_dword s0, s[0:1], s6 offset:0x0
+; GFX942_LEGACY-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v1, s0
+; GFX942_LEGACY-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX942_LEGACY-NEXT:    s_endpgm
+  %zext.offset = zext i32 %soffset to i64
+  %gep0 = getelementptr inbounds i8, ptr addrspace(1) %sbase, i64 %zext.offset
+  %load = load i32, ptr addrspace(1) %gep0
+  %to.vgpr = bitcast i32 %load to float
+  store float %to.vgpr, ptr addrspace(1) %r
+  ret void
+}
+
+; Adjusted from llvm.amdgcn.global.load.lds.ll, tests the offset lowering for
+; Intrinsic::amdgcn_global_load_lds.
+define void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
+; GFX942_PTRADD-LABEL: global_load_lds_dword_saddr_and_vaddr:
+; GFX942_PTRADD:       ; %bb.0: ; %main_body
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v2, v1
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v3, 0
+; GFX942_PTRADD-NEXT:    v_lshl_add_u64 v[2:3], s[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942_PTRADD-NEXT:    s_mov_b32 m0, s0
+; GFX942_PTRADD-NEXT:    s_nop 0
+; GFX942_PTRADD-NEXT:    global_load_lds_dword v[2:3], off offset:48 sc1
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: global_load_lds_dword_saddr_and_vaddr:
+; GFX942_LEGACY:       ; %bb.0: ; %main_body
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    v_readfirstlane_b32 s2, v0
+; GFX942_LEGACY-NEXT:    s_mov_b32 m0, s2
+; GFX942_LEGACY-NEXT:    s_nop 0
+; GFX942_LEGACY-NEXT:    global_load_lds_dword v1, s[0:1] offset:48 sc1
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %voffset.64 = zext i32 %voffset to i64
+  %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
+  call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16)
+  ret void
+}
+
+; Taken from shl_add_ptr_global.ll, tests PTRADD handling in
+; SITargetLowering::performSHLPtrCombine.
+define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr addrspace(1) %extra.use, ptr addrspace(1) %ptr) {
+; GFX942_PTRADD-LABEL: shl_base_global_ptr_global_atomic_fadd:
+; GFX942_PTRADD:       ; %bb.0:
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    s_mov_b64 s[0:1], 0x80
+; GFX942_PTRADD-NEXT:    v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
+; GFX942_PTRADD-NEXT:    v_lshlrev_b64 v[4:5], 2, v[0:1]
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v6, 0x42c80000
+; GFX942_PTRADD-NEXT:    global_atomic_add_f32 v[4:5], v6, off
+; GFX942_PTRADD-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0)
+; GFX942_PTRADD-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: shl_base_global_ptr_global_atomic_fadd:
+; GFX942_LEGACY:       ; %bb.0:
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    v_lshlrev_b64 v[0:1], 2, v[4:5]
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v6, 0x42c80000
+; GFX942_LEGACY-NEXT:    global_atomic_add_f32 v[0:1], v6, off offset:512
+; GFX942_LEGACY-NEXT:    s_mov_b64 s[0:1], 0x80
+; GFX942_LEGACY-NEXT:    v_lshl_add_u64 v[0:1], v[4:5], 0, s[0:1]
+; GFX942_LEGACY-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off sc0 sc1
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0)
+; GFX942_LEGACY-NEXT:    s_setpc_b64 s[30:31]
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(1) %ptr, i64 0, i64 32
+  %cast = ptrtoint ptr addrspace(1) %arrayidx0 to i64
+  %shl = shl i64 %cast, 2
+  %castback = inttoptr i64 %shl to ptr addrspace(1)
+  %unused = atomicrmw fadd ptr addrspace(1) %castback, float 100.0 syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
+  store volatile i64 %cast, ptr addrspace(1) %extra.use, align 4
+  ret void
+}
+
+; Test PTRADD handling in TargetLowering::SimplifyDemandedBits and
+; TargetLowering::ShrinkDemandedOp.
+define i32 @gep_in_const_as_cast_to_const32_as(ptr addrspace(4) %src, i64 %offset) {
+; GFX942_PTRADD-LABEL: gep_in_const_as_cast_to_const32_as:
+; GFX942_PTRADD:       ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3]
+; GFX942_PTRADD-NEXT:    s_mov_b32 s1, 0
+; GFX942_PTRADD-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942_PTRADD-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX942_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942_PTRADD-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: gep_in_const_as_cast_to_const32_as:
+; GFX942_LEGACY:       ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    v_add_u32_e32 v0, v0, v2
+; GFX942_LEGACY-NEXT:    s_mov_b32 s1, 0
+; GFX942_LEGACY-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX942_LEGACY-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX942_LEGACY-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v0, s0
+; GFX942_LEGACY-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %gep = getelementptr i8, ptr addrspace(4) %src, i64 %offset
+  %gep.cast = addrspacecast ptr addrspace(4) %gep to ptr addrspace(6)
+  %l = load i32, ptr addrspace(6) %gep.cast
+  ret i32 %l
+}
+
+@CG = addrspace(4) constant [16 x i32] zeroinitializer, align 4
+
+; Test PTRADD handling in isMemSrcFromConstant.
+define void @replace_const0_memcpy_by_memset(ptr align 4 %dst) {
+; GFX942_PTRADD-LABEL: replace_const0_memcpy_by_memset:
+; GFX942_PTRADD:       ; %bb.0: ; %entry
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    s_getpc_b64 s[0:1]
+; GFX942_PTRADD-NEXT:    s_add_u32 s0, s0, CG@gotpcrel32@lo+4
+; GFX942_PTRADD-NEXT:    s_addc_u32 s1, s1, CG@gotpcrel32@hi+12
+; GFX942_PTRADD-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
+; GFX942_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4
+; GFX942_PTRADD-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942_PTRADD-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; GFX942_PTRADD-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942_PTRADD-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX942_LEGACY-LABEL: replace_const0_memcpy_by_memset:
+; GFX942_LEGACY:       ; %bb.0: ; %entry
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v2, 0
+; GFX942_LEGACY-NEXT:    v_mov_b32_e32 v3, v2
+; GFX942_LEGACY-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; GFX942_LEGACY-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX942_LEGACY-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %gep = getelementptr i8, ptr addrspace(4) @CG, i64 4
+  tail call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 4 %dst, ptr addrspace(4) noundef nonnull align 4 %gep, i64 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0.p4.i64(ptr noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg)
+
+!0 = !{}

@ritter-x2a ritter-x2a marked this pull request as ready for review June 23, 2025 13:50
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants