From 27d60942ae454165566366f0cec1fa3f28117ec3 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 18 Feb 2025 11:50:12 -0500 Subject: [PATCH 1/3] [AMDGPU] Change SGPR layout to striped caller/callee saved This PR updates the SGPR layout to a striped caller/callee-saved design, similar to the VGPR layout. The stripe width is set to 8. Fixes #113782. --- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 6 +- .../amdgpu-simplify-libcall-pow-codegen.ll | 290 +- llvm/test/CodeGen/AMDGPU/bf16.ll | 335 +- ...der-no-live-segment-at-def-implicit-def.ll | 42 +- .../branch-folding-implicit-def-subreg.ll | 404 +- .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 213 +- ...l-args-inreg-no-sgpr-for-csrspill-xfail.ll | 4 +- llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 12 +- .../CodeGen/AMDGPU/call-argument-types.ll | 2557 ++++++------ .../AMDGPU/call-preserved-registers.ll | 34 +- .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 2337 ++++------- .../AMDGPU/csr-sgpr-spill-live-ins.mir | 10 +- llvm/test/CodeGen/AMDGPU/ds_read2.ll | 36 +- .../AMDGPU/dwarf-multi-register-use-crash.ll | 72 +- .../eliminate-frame-index-s-mov-b32.mir | 53 +- .../CodeGen/AMDGPU/function-args-inreg.ll | 4 +- .../CodeGen/AMDGPU/function-resource-usage.ll | 10 +- .../CodeGen/AMDGPU/gfx-call-non-gfx-func.ll | 68 +- .../AMDGPU/gfx-callable-argument-types.ll | 288 +- .../AMDGPU/global_atomics_scan_fadd.ll | 3668 ++++++++--------- .../AMDGPU/global_atomics_scan_fmax.ll | 3108 +++++++------- .../AMDGPU/global_atomics_scan_fmin.ll | 3108 +++++++------- .../AMDGPU/global_atomics_scan_fsub.ll | 3668 ++++++++--------- .../greedy-alloc-fail-sgpr1024-spill.mir | 126 +- .../identical-subrange-spill-infloop.ll | 441 +- llvm/test/CodeGen/AMDGPU/indirect-call.ll | 1240 +++--- llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +- .../llvm.amdgcn.pops.exiting.wave.id.ll | 48 +- llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 45 +- llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 81 +- llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 45 +- llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 81 +- .../lower-work-group-id-intrinsics-hsa.ll | 64 +- ...ne-sink-temporal-divergence-swdev407790.ll | 320 +- .../materialize-frame-index-sgpr.gfx10.ll | 842 +--- .../AMDGPU/materialize-frame-index-sgpr.ll | 1511 ++----- ...-knownbits-assign-crash-gh-issue-110930.ll | 26 +- .../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 86 +- .../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 56 +- .../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 30 +- .../AMDGPU/promote-constOffset-to-imm.ll | 446 +- .../ran-out-of-sgprs-allocation-failure.mir | 206 +- .../AMDGPU/schedule-amdgpu-tracker-physreg.ll | 8 +- llvm/test/CodeGen/AMDGPU/select.f16.ll | 15 +- .../sgpr-spill-update-only-slot-indexes.ll | 16 +- .../AMDGPU/shufflevector.v2i64.v8i64.ll | 2240 +++------- llvm/test/CodeGen/AMDGPU/sibling-call.ll | 240 +- .../AMDGPU/snippet-copy-bundle-regression.mir | 55 +- .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 38 +- .../AMDGPU/spill-sgpr-used-for-exec-copy.mir | 11 +- .../spill_more_than_wavesize_csr_sgprs.ll | 396 +- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 200 +- ...tack-pointer-offset-relative-frameindex.ll | 22 +- llvm/test/CodeGen/AMDGPU/stack-realign.ll | 20 +- .../AMDGPU/tuple-allocation-failure.ll | 501 ++- .../unallocatable-bundle-regression.mir | 22 +- .../AMDGPU/unstructured-cfg-def-use-issue.ll | 212 +- .../CodeGen/AMDGPU/use_restore_frame_reg.mir | 76 +- .../AMDGPU/vgpr-large-tuple-alloc-error.ll | 352 +- .../CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir | 3 +- 60 files changed, 13283 insertions(+), 17167 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 80969fce3d77f..e3861a7d06c3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -91,7 +91,11 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs< >; def CSR_AMDGPU_SGPRs : CalleeSavedRegs< - (sequence "SGPR%u", 30, 105) + (add (sequence "SGPR%u", 30, 37), + (sequence "SGPR%u", 46, 53), + (sequence "SGPR%u", 62, 69), + (sequence "SGPR%u", 78, 85), + (sequence "SGPR%u", 94, 105)) >; def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 54b4888120e5f..440e509ec947f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -124,35 +124,35 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 @@ -160,15 +160,15 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -178,14 +178,14 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -264,34 +264,34 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v42, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v3 ; CHECK-NEXT: v_mov_b32_e32 v40, v2 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1] @@ -299,28 +299,28 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v42 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -406,35 +406,35 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 @@ -442,15 +442,15 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -460,14 +460,14 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -548,32 +548,32 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: v_writelane_b32 v42, s35, 3 ; CHECK-NEXT: v_writelane_b32 v42, s36, 4 ; CHECK-NEXT: v_writelane_b32 v42, s37, 5 -; CHECK-NEXT: v_writelane_b32 v42, s38, 6 -; CHECK-NEXT: v_writelane_b32 v42, s39, 7 +; CHECK-NEXT: v_writelane_b32 v42, s46, 6 +; CHECK-NEXT: v_writelane_b32 v42, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v42, s40, 8 -; CHECK-NEXT: v_writelane_b32 v42, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v42, s48, 8 +; CHECK-NEXT: v_writelane_b32 v42, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v42, s42, 10 -; CHECK-NEXT: v_writelane_b32 v42, s43, 11 -; CHECK-NEXT: v_writelane_b32 v42, s44, 12 +; CHECK-NEXT: v_writelane_b32 v42, s50, 10 +; CHECK-NEXT: v_writelane_b32 v42, s51, 11 +; CHECK-NEXT: v_writelane_b32 v42, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v42, s45, 13 +; CHECK-NEXT: v_writelane_b32 v42, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -582,28 +582,28 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s45, v42, 13 -; CHECK-NEXT: v_readlane_b32 s44, v42, 12 -; CHECK-NEXT: v_readlane_b32 s43, v42, 11 -; CHECK-NEXT: v_readlane_b32 s42, v42, 10 -; CHECK-NEXT: v_readlane_b32 s41, v42, 9 -; CHECK-NEXT: v_readlane_b32 s40, v42, 8 -; CHECK-NEXT: v_readlane_b32 s39, v42, 7 -; CHECK-NEXT: v_readlane_b32 s38, v42, 6 +; CHECK-NEXT: v_readlane_b32 s53, v42, 13 +; CHECK-NEXT: v_readlane_b32 s52, v42, 12 +; CHECK-NEXT: v_readlane_b32 s51, v42, 11 +; CHECK-NEXT: v_readlane_b32 s50, v42, 10 +; CHECK-NEXT: v_readlane_b32 s49, v42, 9 +; CHECK-NEXT: v_readlane_b32 s48, v42, 8 +; CHECK-NEXT: v_readlane_b32 s47, v42, 7 +; CHECK-NEXT: v_readlane_b32 s46, v42, 6 ; CHECK-NEXT: v_readlane_b32 s37, v42, 5 ; CHECK-NEXT: v_readlane_b32 s36, v42, 4 ; CHECK-NEXT: v_readlane_b32 s35, v42, 3 @@ -689,34 +689,34 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v41, v1 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v41 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: v_or_b32_e32 v42, 1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -725,15 +725,15 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -742,14 +742,14 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index aafdb1c8cc36f..10d6d09441ee5 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -40671,14 +40671,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GCN-LABEL: v_vselect_v16bf16: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_waitcnt expcnt(0) -; GCN-NEXT: v_writelane_b32 v31, s30, 0 -; GCN-NEXT: v_writelane_b32 v31, s31, 1 -; GCN-NEXT: v_writelane_b32 v31, s34, 2 -; GCN-NEXT: v_writelane_b32 v31, s35, 3 ; GCN-NEXT: v_and_b32_e32 v0, 1, v0 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GCN-NEXT: v_and_b32_e32 v0, 1, v1 @@ -40716,21 +40708,21 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GCN-NEXT: v_and_b32_e32 v8, 1, v14 ; GCN-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v7 ; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 -; GCN-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v8 +; GCN-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v8 ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:64 ; GCN-NEXT: v_and_b32_e32 v9, 1, v15 -; GCN-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v9 +; GCN-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v9 ; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:60 ; GCN-NEXT: s_waitcnt vmcnt(2) ; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8 -; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[34:35] +; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[40:41] ; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:56 ; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v30 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9 -; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[30:31] +; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[38:39] ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v29 ; GCN-NEXT: s_waitcnt vmcnt(1) @@ -40806,14 +40798,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GCN-NEXT: v_and_b32_e32 v13, 0xffff0000, v13 ; GCN-NEXT: v_and_b32_e32 v14, 0xffff0000, v14 ; GCN-NEXT: v_and_b32_e32 v15, 0xffff0000, v15 -; GCN-NEXT: v_readlane_b32 s35, v31, 3 -; GCN-NEXT: v_readlane_b32 s34, v31, 2 -; GCN-NEXT: v_readlane_b32 s31, v31, 1 -; GCN-NEXT: v_readlane_b32 s30, v31, 0 -; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: v_vselect_v16bf16: @@ -40954,9 +40938,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX8-LABEL: v_vselect_v16bf16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v1 @@ -40982,17 +40963,13 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX8-NEXT: v_and_b32_e32 v0, 1, v11 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v12 -; GFX8-NEXT: v_writelane_b32 v31, s30, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v13 -; GFX8-NEXT: v_writelane_b32 v31, s31, 1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v14 -; GFX8-NEXT: v_writelane_b32 v31, s34, 2 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v15 -; GFX8-NEXT: v_writelane_b32 v31, s35, 3 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v22 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v30 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v1, v0, s[28:29] @@ -41018,9 +40995,9 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX8-NEXT: v_or_b32_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v5, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[38:39] ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[34:35] +; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[40:41] ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v19 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v27 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v1, v0, s[16:17] @@ -41043,14 +41020,6 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX8-NEXT: v_or_b32_sdwa v2, v13, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v3, v12, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v7, v10, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_readlane_b32 s35, v31, 3 -; GFX8-NEXT: v_readlane_b32 s34, v31, 2 -; GFX8-NEXT: v_readlane_b32 s31, v31, 1 -; GFX8-NEXT: v_readlane_b32 s30, v31, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_vselect_v16bf16: @@ -42030,108 +41999,78 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v34, s30, 0 -; GFX8-NEXT: v_writelane_b32 v34, s31, 1 -; GFX8-NEXT: v_writelane_b32 v34, s34, 2 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX8-NEXT: v_writelane_b32 v34, s35, 3 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v1 -; GFX8-NEXT: v_writelane_b32 v34, s36, 4 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v2 -; GFX8-NEXT: v_writelane_b32 v34, s37, 5 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v3 -; GFX8-NEXT: v_writelane_b32 v34, s38, 6 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v4 -; GFX8-NEXT: v_writelane_b32 v34, s39, 7 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v5 -; GFX8-NEXT: v_writelane_b32 v34, s40, 8 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v6 -; GFX8-NEXT: v_writelane_b32 v34, s41, 9 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[14:15], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v7 -; GFX8-NEXT: v_writelane_b32 v34, s42, 10 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[16:17], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v8 -; GFX8-NEXT: v_writelane_b32 v34, s43, 11 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[18:19], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v9 -; GFX8-NEXT: v_writelane_b32 v34, s44, 12 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[20:21], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v10 -; GFX8-NEXT: v_writelane_b32 v34, s45, 13 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[22:23], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v11 -; GFX8-NEXT: v_writelane_b32 v34, s46, 14 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v12 -; GFX8-NEXT: v_writelane_b32 v34, s47, 15 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v13 -; GFX8-NEXT: v_writelane_b32 v34, s48, 16 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v14 -; GFX8-NEXT: v_writelane_b32 v34, s49, 17 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v15 -; GFX8-NEXT: v_writelane_b32 v34, s50, 18 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v16 -; GFX8-NEXT: v_writelane_b32 v34, s51, 19 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v17 -; GFX8-NEXT: v_writelane_b32 v34, s52, 20 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v18 -; GFX8-NEXT: v_writelane_b32 v34, s53, 21 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v19 -; GFX8-NEXT: v_writelane_b32 v34, s54, 22 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v20 -; GFX8-NEXT: v_writelane_b32 v34, s55, 23 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v21 -; GFX8-NEXT: v_writelane_b32 v34, s56, 24 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v22 -; GFX8-NEXT: v_writelane_b32 v34, s57, 25 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[48:49], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v23 -; GFX8-NEXT: v_writelane_b32 v34, s58, 26 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[50:51], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v24 -; GFX8-NEXT: v_writelane_b32 v34, s59, 27 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[52:53], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v25 -; GFX8-NEXT: v_writelane_b32 v34, s60, 28 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v26 -; GFX8-NEXT: v_writelane_b32 v34, s61, 29 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v27 -; GFX8-NEXT: v_writelane_b32 v34, s62, 30 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 +; GFX8-NEXT: v_writelane_b32 v34, s30, 0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v28 -; GFX8-NEXT: v_writelane_b32 v34, s63, 31 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 +; GFX8-NEXT: v_writelane_b32 v34, s31, 1 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v29 -; GFX8-NEXT: v_writelane_b32 v34, s64, 32 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0 +; GFX8-NEXT: v_writelane_b32 v34, s34, 2 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v30 -; GFX8-NEXT: v_writelane_b32 v34, s65, 33 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[64:65], 1, v0 +; GFX8-NEXT: v_writelane_b32 v34, s35, 3 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 ; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32 -; GFX8-NEXT: v_writelane_b32 v34, s66, 34 -; GFX8-NEXT: v_writelane_b32 v34, s67, 35 +; GFX8-NEXT: v_writelane_b32 v34, s36, 4 +; GFX8-NEXT: v_writelane_b32 v34, s37, 5 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[66:67], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 @@ -42168,40 +42107,40 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v29 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v28, 16, v32 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[66:67] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[64:65] +; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[36:37] +; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[34:35] ; GFX8-NEXT: v_lshrrev_b32_e32 v32, 16, v31 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v30 -; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[62:63] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[60:61] +; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[90:91] ; GFX8-NEXT: v_lshrrev_b32_e32 v31, 16, v27 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v26 -; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[58:59] -; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[56:57] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[88:89] +; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87] ; GFX8-NEXT: v_lshrrev_b32_e32 v27, 16, v25 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[54:55] -; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[52:53] +; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[76:77] +; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75] ; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v23 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v22 -; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[50:51] -; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[48:49] +; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[72:73] +; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71] ; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v21 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v20 -; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[46:47] -; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[44:45] +; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[60:61] +; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59] ; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v19 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v18 -; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[42:43] -; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[56:57] +; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55] ; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v17 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v16 -; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[38:39] -; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[36:37] +; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[44:45] +; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43] ; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v15 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v14 -; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[34:35] -; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39] ; GFX8-NEXT: v_lshrrev_b32_e32 v15, 16, v13 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v33, v15, s[28:29] @@ -42262,36 +42201,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_or_b32_sdwa v13, v26, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v14, v30, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v15, v29, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-NEXT: v_readlane_b32 s67, v34, 35 -; GFX8-NEXT: v_readlane_b32 s66, v34, 34 -; GFX8-NEXT: v_readlane_b32 s65, v34, 33 -; GFX8-NEXT: v_readlane_b32 s64, v34, 32 -; GFX8-NEXT: v_readlane_b32 s63, v34, 31 -; GFX8-NEXT: v_readlane_b32 s62, v34, 30 -; GFX8-NEXT: v_readlane_b32 s61, v34, 29 -; GFX8-NEXT: v_readlane_b32 s60, v34, 28 -; GFX8-NEXT: v_readlane_b32 s59, v34, 27 -; GFX8-NEXT: v_readlane_b32 s58, v34, 26 -; GFX8-NEXT: v_readlane_b32 s57, v34, 25 -; GFX8-NEXT: v_readlane_b32 s56, v34, 24 -; GFX8-NEXT: v_readlane_b32 s55, v34, 23 -; GFX8-NEXT: v_readlane_b32 s54, v34, 22 -; GFX8-NEXT: v_readlane_b32 s53, v34, 21 -; GFX8-NEXT: v_readlane_b32 s52, v34, 20 -; GFX8-NEXT: v_readlane_b32 s51, v34, 19 -; GFX8-NEXT: v_readlane_b32 s50, v34, 18 -; GFX8-NEXT: v_readlane_b32 s49, v34, 17 -; GFX8-NEXT: v_readlane_b32 s48, v34, 16 -; GFX8-NEXT: v_readlane_b32 s47, v34, 15 -; GFX8-NEXT: v_readlane_b32 s46, v34, 14 -; GFX8-NEXT: v_readlane_b32 s45, v34, 13 -; GFX8-NEXT: v_readlane_b32 s44, v34, 12 -; GFX8-NEXT: v_readlane_b32 s43, v34, 11 -; GFX8-NEXT: v_readlane_b32 s42, v34, 10 -; GFX8-NEXT: v_readlane_b32 s41, v34, 9 -; GFX8-NEXT: v_readlane_b32 s40, v34, 8 -; GFX8-NEXT: v_readlane_b32 s39, v34, 7 -; GFX8-NEXT: v_readlane_b32 s38, v34, 6 ; GFX8-NEXT: v_readlane_b32 s37, v34, 5 ; GFX8-NEXT: v_readlane_b32 s36, v34, 4 ; GFX8-NEXT: v_readlane_b32 s35, v34, 3 @@ -42310,108 +42219,76 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v33, s30, 0 -; GFX9-NEXT: v_writelane_b32 v33, s31, 1 -; GFX9-NEXT: v_writelane_b32 v33, s34, 2 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_writelane_b32 v33, s35, 3 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v3 -; GFX9-NEXT: v_writelane_b32 v33, s36, 4 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v2 -; GFX9-NEXT: v_writelane_b32 v33, s37, 5 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[8:9], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v5 -; GFX9-NEXT: v_writelane_b32 v33, s38, 6 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[10:11], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v4 -; GFX9-NEXT: v_writelane_b32 v33, s39, 7 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[12:13], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v7 -; GFX9-NEXT: v_writelane_b32 v33, s40, 8 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[14:15], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v6 -; GFX9-NEXT: v_writelane_b32 v33, s41, 9 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[16:17], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v9 -; GFX9-NEXT: v_writelane_b32 v33, s42, 10 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[18:19], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v8 -; GFX9-NEXT: v_writelane_b32 v33, s43, 11 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[20:21], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v11 -; GFX9-NEXT: v_writelane_b32 v33, s44, 12 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[22:23], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v10 -; GFX9-NEXT: v_writelane_b32 v33, s45, 13 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[24:25], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v13 -; GFX9-NEXT: v_writelane_b32 v33, s46, 14 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[26:27], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v12 -; GFX9-NEXT: v_writelane_b32 v33, s47, 15 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v15 -; GFX9-NEXT: v_writelane_b32 v33, s48, 16 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v14 -; GFX9-NEXT: v_writelane_b32 v33, s49, 17 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v17 -; GFX9-NEXT: v_writelane_b32 v33, s50, 18 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v16 -; GFX9-NEXT: v_writelane_b32 v33, s51, 19 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v19 -; GFX9-NEXT: v_writelane_b32 v33, s52, 20 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v18 -; GFX9-NEXT: v_writelane_b32 v33, s53, 21 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v21 -; GFX9-NEXT: v_writelane_b32 v33, s54, 22 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v20 -; GFX9-NEXT: v_writelane_b32 v33, s55, 23 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v23 -; GFX9-NEXT: v_writelane_b32 v33, s56, 24 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[48:49], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v22 -; GFX9-NEXT: v_writelane_b32 v33, s57, 25 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[50:51], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v25 -; GFX9-NEXT: v_writelane_b32 v33, s58, 26 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[52:53], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v24 -; GFX9-NEXT: v_writelane_b32 v33, s59, 27 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v27 -; GFX9-NEXT: v_writelane_b32 v33, s60, 28 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v26 -; GFX9-NEXT: v_writelane_b32 v33, s61, 29 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v29 -; GFX9-NEXT: v_writelane_b32 v33, s62, 30 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v28 -; GFX9-NEXT: v_writelane_b32 v33, s63, 31 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[92:93], 1, v0 ; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 -; GFX9-NEXT: v_writelane_b32 v33, s64, 32 -; GFX9-NEXT: v_writelane_b32 v33, s65, 33 -; GFX9-NEXT: v_writelane_b32 v33, s66, 34 +; GFX9-NEXT: v_writelane_b32 v33, s30, 0 +; GFX9-NEXT: v_writelane_b32 v33, s31, 1 +; GFX9-NEXT: v_writelane_b32 v33, s34, 2 ; GFX9-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX9-NEXT: v_writelane_b32 v33, s67, 35 +; GFX9-NEXT: v_writelane_b32 v33, s35, 3 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[64:65], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v30 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[66:67], 1, v0 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 @@ -42445,42 +42322,42 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:128 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:64 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_cndmask_b32_e64 v29, v31, v32, s[66:67] +; GFX9-NEXT: v_cndmask_b32_e64 v29, v31, v32, s[34:35] ; GFX9-NEXT: v_lshrrev_b32_e32 v32, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v31, 16, v31 -; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[64:65] -; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[62:63] +; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[30:31] +; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[92:93] ; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v30 ; GFX9-NEXT: v_lshrrev_b32_e32 v28, 16, v28 -; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[60:61] -; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[58:59] +; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[90:91] +; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[88:89] ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v27 ; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v26 -; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[56:57] -; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[54:55] +; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87] +; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[76:77] ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v25 ; GFX9-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[52:53] -; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[50:51] +; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75] +; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[72:73] ; GFX9-NEXT: v_lshrrev_b32_e32 v23, 16, v23 ; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v22 -; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[48:49] -; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[46:47] +; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71] +; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[60:61] ; GFX9-NEXT: v_lshrrev_b32_e32 v21, 16, v21 ; GFX9-NEXT: v_lshrrev_b32_e32 v20, 16, v20 -; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[44:45] -; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[42:43] +; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59] +; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[56:57] ; GFX9-NEXT: v_lshrrev_b32_e32 v19, 16, v19 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[40:41] -; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[38:39] +; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55] +; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[44:45] ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v17 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16 -; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[36:37] -; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[34:35] +; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43] +; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[40:41] ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14 -; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[30:31] +; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39] ; GFX9-NEXT: v_cndmask_b32_e64 v15, v12, v13, s[28:29] ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13 ; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12 @@ -42526,38 +42403,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX9-NEXT: v_perm_b32 v13, v26, v30, s4 ; GFX9-NEXT: v_perm_b32 v14, v28, v32, s4 ; GFX9-NEXT: v_perm_b32 v15, v31, v29, s4 -; GFX9-NEXT: v_readlane_b32 s67, v33, 35 -; GFX9-NEXT: v_readlane_b32 s66, v33, 34 -; GFX9-NEXT: v_readlane_b32 s65, v33, 33 -; GFX9-NEXT: v_readlane_b32 s64, v33, 32 -; GFX9-NEXT: v_readlane_b32 s63, v33, 31 -; GFX9-NEXT: v_readlane_b32 s62, v33, 30 -; GFX9-NEXT: v_readlane_b32 s61, v33, 29 -; GFX9-NEXT: v_readlane_b32 s60, v33, 28 -; GFX9-NEXT: v_readlane_b32 s59, v33, 27 -; GFX9-NEXT: v_readlane_b32 s58, v33, 26 -; GFX9-NEXT: v_readlane_b32 s57, v33, 25 -; GFX9-NEXT: v_readlane_b32 s56, v33, 24 -; GFX9-NEXT: v_readlane_b32 s55, v33, 23 -; GFX9-NEXT: v_readlane_b32 s54, v33, 22 -; GFX9-NEXT: v_readlane_b32 s53, v33, 21 -; GFX9-NEXT: v_readlane_b32 s52, v33, 20 -; GFX9-NEXT: v_readlane_b32 s51, v33, 19 -; GFX9-NEXT: v_readlane_b32 s50, v33, 18 -; GFX9-NEXT: v_readlane_b32 s49, v33, 17 -; GFX9-NEXT: v_readlane_b32 s48, v33, 16 -; GFX9-NEXT: v_readlane_b32 s47, v33, 15 -; GFX9-NEXT: v_readlane_b32 s46, v33, 14 -; GFX9-NEXT: v_readlane_b32 s45, v33, 13 -; GFX9-NEXT: v_readlane_b32 s44, v33, 12 -; GFX9-NEXT: v_readlane_b32 s43, v33, 11 -; GFX9-NEXT: v_readlane_b32 s42, v33, 10 -; GFX9-NEXT: v_readlane_b32 s41, v33, 9 -; GFX9-NEXT: v_readlane_b32 s40, v33, 8 -; GFX9-NEXT: v_readlane_b32 s39, v33, 7 -; GFX9-NEXT: v_readlane_b32 s38, v33, 6 -; GFX9-NEXT: v_readlane_b32 s37, v33, 5 -; GFX9-NEXT: v_readlane_b32 s36, v33, 4 ; GFX9-NEXT: v_readlane_b32 s35, v33, 3 ; GFX9-NEXT: v_readlane_b32 s34, v33, 2 ; GFX9-NEXT: v_readlane_b32 s31, v33, 1 diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll index f9ffa5ae57f3e..85b9adfe6ea5c 100644 --- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll @@ -9,24 +9,24 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx8 s[36:43], s[8:9], 0x0 +; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s12, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_cmp_lg_u32 s40, 0 +; CHECK-NEXT: s_cmp_lg_u32 s100, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_8 ; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i -; CHECK-NEXT: s_cmp_eq_u32 s42, 0 +; CHECK-NEXT: s_cmp_eq_u32 s102, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_4 ; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i -; CHECK-NEXT: s_cmp_lg_u32 s43, 0 +; CHECK-NEXT: s_cmp_lg_u32 s103, 0 ; CHECK-NEXT: s_mov_b32 s17, 0 ; CHECK-NEXT: s_cselect_b32 s12, -1, 0 ; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccz .LBB0_5 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: s_mov_b32 s36, 0 +; CHECK-NEXT: s_mov_b32 s96, 0 ; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccz .LBB0_6 ; CHECK-NEXT: s_branch .LBB0_7 @@ -34,16 +34,16 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_mov_b32 s14, s12 ; CHECK-NEXT: s_mov_b32 s15, s12 ; CHECK-NEXT: s_mov_b32 s13, s12 -; CHECK-NEXT: s_mov_b64 s[38:39], s[14:15] -; CHECK-NEXT: s_mov_b64 s[36:37], s[12:13] +; CHECK-NEXT: s_mov_b64 s[98:99], s[14:15] +; CHECK-NEXT: s_mov_b64 s[96:97], s[12:13] ; CHECK-NEXT: s_branch .LBB0_7 ; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i -; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s41, 0 -; CHECK-NEXT: s_mov_b32 s36, 1.0 +; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s101, 0 +; CHECK-NEXT: s_mov_b32 s96, 1.0 ; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000 -; CHECK-NEXT: s_mov_b32 s37, s36 -; CHECK-NEXT: s_mov_b32 s38, s36 -; CHECK-NEXT: s_mov_b32 s39, s36 +; CHECK-NEXT: s_mov_b32 s97, s96 +; CHECK-NEXT: s_mov_b32 s98, s96 +; CHECK-NEXT: s_mov_b32 s99, s96 ; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccnz .LBB0_7 ; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i @@ -55,7 +55,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0 ; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1 -; CHECK-NEXT: v_add_f32_e64 v1, s17, s36 +; CHECK-NEXT: v_add_f32_e64 v1, s17, s96 ; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] ; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] ; CHECK-NEXT: s_mov_b32 s12, s14 @@ -65,13 +65,13 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: s_mov_b32 s13, s15 ; CHECK-NEXT: s_mov_b32 s14, s16 -; CHECK-NEXT: s_mov_b32 s36, 0 +; CHECK-NEXT: s_mov_b32 s96, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] ; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35] -; CHECK-NEXT: s_mov_b32 s37, s36 -; CHECK-NEXT: s_mov_b32 s38, s36 -; CHECK-NEXT: s_mov_b32 s39, s36 +; CHECK-NEXT: s_mov_b32 s97, s96 +; CHECK-NEXT: s_mov_b32 s98, s96 +; CHECK-NEXT: s_mov_b32 s99, s96 ; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12 @@ -80,11 +80,11 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20 -; CHECK-NEXT: v_mov_b32_e32 v0, s36 +; CHECK-NEXT: v_mov_b32_e32 v0, s96 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, s37 -; CHECK-NEXT: v_mov_b32_e32 v2, s38 -; CHECK-NEXT: v_mov_b32_e32 v3, s39 +; CHECK-NEXT: v_mov_b32_e32 v1, s97 +; CHECK-NEXT: v_mov_b32_e32 v2, s98 +; CHECK-NEXT: v_mov_b32_e32 v3, s99 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; CHECK-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index 98136347ab702..bce02a4cfacde 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -17,7 +17,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr8_sgpr9, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4) ; GFX90A-NEXT: renamable $sgpr17 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4) ; GFX90A-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27 = S_LOAD_DWORDX4_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4) + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4) ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr33, 0, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1 @@ -33,7 +33,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.1.bb103: ; GFX90A-NEXT: successors: %bb.58(0x40000000), %bb.2(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc @@ -41,7 +41,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr56, $sgpr57, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr44, $sgpr45, $sgpr20_sgpr21_sgpr22, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr2, $vgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF @@ -54,7 +54,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.3.Flow17: ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.57(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc @@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.bb15: ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec ; GFX90A-NEXT: renamable $vgpr4 = COPY renamable $sgpr25, implicit $exec @@ -79,16 +79,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -111,7 +111,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.6.Flow20: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr17, implicit $exec ; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr17, implicit $exec @@ -124,15 +124,15 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.7.Flow19: ; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0 ; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.62, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.8.Flow32: ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr18_sgpr19, implicit-def $exec, implicit-def $scc, implicit $exec @@ -141,58 +141,58 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.9.bb89: ; GFX90A-NEXT: successors: %bb.10(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.10.Flow33: ; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc - ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr58_sgpr59, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.11.bb84: ; GFX90A-NEXT: successors: %bb.12(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.12.Flow34: ; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc - ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr54_sgpr55, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.13.bb79: ; GFX90A-NEXT: successors: %bb.14(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.14.Flow35: ; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc - ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.15.bb72: ; GFX90A-NEXT: successors: %bb.16(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc @@ -202,122 +202,122 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: $sgpr13 = COPY killed renamable $sgpr15 ; GFX90A-NEXT: $sgpr14 = COPY killed renamable $sgpr16 ; GFX90A-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr18_sgpr19, @f2, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0, implicit $vgpr1 - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.16.Flow36: ; GFX90A-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr66_sgpr67, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.18, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.17.bb67: ; GFX90A-NEXT: successors: %bb.18(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.18.Flow37: ; GFX90A-NEXT: successors: %bb.19(0x40000000), %bb.20(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr64_sgpr65, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.20, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.19.bb62: ; GFX90A-NEXT: successors: %bb.20(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.20.Flow38: ; GFX90A-NEXT: successors: %bb.21(0x40000000), %bb.22(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr62_sgpr63, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.22, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.21.bb54: ; GFX90A-NEXT: successors: %bb.22(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.22.Flow39: ; GFX90A-NEXT: successors: %bb.23(0x40000000), %bb.24(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr44_sgpr45, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.24, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.23.bb47: ; GFX90A-NEXT: successors: %bb.24(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.24.Flow40: ; GFX90A-NEXT: successors: %bb.25(0x40000000), %bb.26(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.26, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.25.bb40: ; GFX90A-NEXT: successors: %bb.26(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.26.Flow41: ; GFX90A-NEXT: successors: %bb.27(0x40000000), %bb.28(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.28, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.27.bb33: ; GFX90A-NEXT: successors: %bb.28(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.28.Flow42: ; GFX90A-NEXT: successors: %bb.34(0x40000000), %bb.29(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.34, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.29.Flow43: ; GFX90A-NEXT: successors: %bb.30(0x40000000), %bb.31(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc @@ -325,17 +325,17 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.30.bb19: ; GFX90A-NEXT: successors: %bb.31(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.31.Flow44: ; GFX90A-NEXT: successors: %bb.32(0x40000000), %bb.33(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr56_sgpr57, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr68_sgpr69, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr56_sgpr57, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr68_sgpr69, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.33, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.32.UnifiedUnreachableBlock: @@ -351,32 +351,32 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.34.bb26: ; GFX90A-NEXT: successors: %bb.29(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr56_sgpr57, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_OR_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.29 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.35.bb20: ; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec :: (load (s8) from %ir.i21, addrspace 1) ; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr43, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_LT_I16_e64 0, killed $vgpr0, implicit $exec - ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -400,19 +400,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.36.Flow21: ; GFX90A-NEXT: successors: %bb.6(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.6 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.37.bb27: ; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr44_sgpr45, $sgpr42_sgpr43 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr42_sgpr43, $sgpr64_sgpr65, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49, $sgpr50_sgpr51 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec :: (load (s8) from %ir.i28, addrspace 1) ; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr45, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF @@ -437,33 +440,34 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.38.Flow22: ; GFX90A-NEXT: successors: %bb.36(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr40_sgpr41, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr40_sgpr41, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_ANDN2_B64 killed renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_OR_B64 killed renamable $sgpr36_sgpr37, killed renamable $sgpr56_sgpr57, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_OR_B64 killed renamable $sgpr36_sgpr37, killed renamable $sgpr44_sgpr45, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.36 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.39.bb34: ; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47, $sgpr44_sgpr45 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec :: (load (s8) from %ir.i35, addrspace 1) ; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF @@ -487,38 +491,37 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.40.Flow23: ; GFX90A-NEXT: successors: %bb.38(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 killed renamable $sgpr56_sgpr57, killed renamable $sgpr60_sgpr61, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.38 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.41.bb41: ; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr48_sgpr49, $sgpr46_sgpr47 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr60_sgpr61, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc ; GFX90A-NEXT: renamable $vgpr59, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec :: (load (s8) from %ir.i42, addrspace 1) ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr18, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -539,41 +542,41 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.42.Flow24: ; GFX90A-NEXT: successors: %bb.40(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc ; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr18, implicit $exec ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 killed renamable $sgpr56_sgpr57, killed renamable $sgpr60_sgpr61, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.40 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.43.bb55: ; GFX90A-NEXT: successors: %bb.48(0x40000000), %bb.44(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr44_sgpr45, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr46_sgpr47 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: S_BITCMP1_B32 killed renamable $sgpr33, 16, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_CSELECT_B64 -1, 0, implicit killed $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_XOR_B64 renamable $sgpr64_sgpr65, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $vgpr62 = V_ADD_CO_U32_e32 6144, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec - ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc + ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr50_sgpr51, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.48, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.44: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr56, $vgpr47, $vgpr18, $vgpr30, $vgpr31, $vgpr58, $vgpr61, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $vgpr57, $vgpr63, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr46, $vgpr45, $vgpr2, $vgpr3, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr60, $vgpr62 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr56, $vgpr47, $vgpr18, $vgpr30, $vgpr31, $vgpr58, $vgpr61, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8, $sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $vgpr57, $vgpr63, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22, $sgpr22_sgpr23, $sgpr24_sgpr25_sgpr26, $sgpr26_sgpr27, $vgpr46, $vgpr45, $vgpr2, $vgpr3, $vgpr44, $vgpr43, $vgpr42, $vgpr41, $vgpr40, $vgpr60, $vgpr62 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -587,36 +590,35 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 ; GFX90A-NEXT: renamable $sgpr17 = IMPLICIT_DEF - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.45.Flow26: ; GFX90A-NEXT: successors: %bb.47(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.47 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.46.bb48: ; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr44_sgpr45, $sgpr52_sgpr53 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr60_sgpr61, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr1, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec :: (load (s8) from %ir.i49, addrspace 1) - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = COPY renamable $sgpr36_sgpr37 - ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec @@ -640,51 +642,51 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.47.Flow25: ; GFX90A-NEXT: successors: %bb.42(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_OR_B64 killed renamable $sgpr46_sgpr47, killed renamable $sgpr56_sgpr57, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr52_sgpr53, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.42 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.48.bb63: ; GFX90A-NEXT: successors: %bb.50(0x40000000), %bb.49(0x40000000) - ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55, $sgpr46_sgpr47 + ; GFX90A-NEXT: liveins: $vcc, $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57, $sgpr48_sgpr49 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.50, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.49: ; GFX90A-NEXT: successors: %bb.44(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1 ; GFX90A-NEXT: S_BRANCH %bb.44 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.50.bb68: ; GFX90A-NEXT: successors: %bb.54(0x40000000), %bb.51(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr50_sgpr51, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = nuw nsw V_LSHLREV_B32_e32 3, $vgpr30, implicit $exec ; GFX90A-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr48_sgpr49, implicit-def dead $scc + ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr50_sgpr51, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.54, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.51: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr58_sgpr59, $sgpr54_sgpr55 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -701,20 +703,20 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.52.bb80: ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.53(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr17 = S_BFE_U32 renamable $sgpr20, 65560, implicit-def dead $scc ; GFX90A-NEXT: S_CMP_EQ_U32 killed renamable $sgpr17, 0, implicit-def $scc ; GFX90A-NEXT: renamable $vgpr6 = V_ADD_CO_U32_e32 4096, $vgpr0, implicit-def $vcc, implicit $exec - ; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr50_sgpr51 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr7, dead renamable $sgpr52_sgpr53 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.59, implicit killed $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.53: ; GFX90A-NEXT: successors: %bb.61(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF @@ -730,13 +732,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.54.bb73: ; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr56_sgpr57:0x000000000000000F, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr6 = GLOBAL_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec :: (load (s8) from %ir.i74, addrspace 1) ; GFX90A-NEXT: renamable $vgpr4 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr5, dead renamable $sgpr58_sgpr59 = V_ADDC_U32_e64 0, 0, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr6, implicit $exec ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 @@ -756,14 +758,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.55.Flow29: ; GFX90A-NEXT: successors: %bb.45(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr60_sgpr61, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.45 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.56.bb90: ; GFX90A-NEXT: successors: %bb.60(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr52_sgpr53, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr53 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr64_sgpr65, implicit $exec ; GFX90A-NEXT: renamable $vgpr10 = V_MOV_B32_e32 0, implicit $exec @@ -772,12 +774,12 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr16_vgpr17 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.7, addrspace 3) ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr22, implicit $exec ; GFX90A-NEXT: renamable $vgpr12_vgpr13 = DS_READ_B64_gfx9 killed renamable $vgpr10, 0, 0, implicit $exec :: (load (s64) from %ir.8, addrspace 3) - ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr56, implicit $exec - ; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr57, killed $vgpr10, 1, implicit $exec + ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr44, implicit $exec + ; GFX90A-NEXT: renamable $vgpr11 = V_ALIGNBIT_B32_e64 killed $sgpr45, killed $vgpr10, 1, implicit $exec ; GFX90A-NEXT: renamable $vgpr52 = V_ALIGNBIT_B32_e64 $vgpr17, $vgpr16, 1, implicit $exec ; GFX90A-NEXT: renamable $vgpr17 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec ; GFX90A-NEXT: renamable $vgpr15 = V_ALIGNBIT_B32_e64 $vgpr15, $vgpr14, 1, implicit $exec - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $vgpr14, implicit $exec ; GFX90A-NEXT: S_BRANCH %bb.60 @@ -788,16 +790,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr17, implicit $exec - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -821,7 +823,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.58.bb105: ; GFX90A-NEXT: successors: %bb.3(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr22_vgpr23 = DS_READ_B64_gfx9 killed renamable $vgpr0, 0, 0, implicit $exec :: (load (s64) from `ptr addrspace(3) null`, addrspace 3) @@ -840,13 +842,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.59.bb85: ; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.60(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr56_sgpr57:0x000000000000000F, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr8 = V_OR_B32_e32 1, $vgpr6, implicit $exec ; GFX90A-NEXT: renamable $vgpr9 = COPY renamable $vgpr7, implicit $exec ; GFX90A-NEXT: renamable $vgpr10 = FLAT_LOAD_UBYTE renamable $vgpr8_vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i86) ; GFX90A-NEXT: renamable $sgpr17 = S_MOV_B32 0 - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr10, implicit $exec ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF @@ -857,31 +859,31 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12 ; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10 - ; GFX90A-NEXT: $sgpr52_sgpr53 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr54_sgpr55 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.56, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.60.Flow31: ; GFX90A-NEXT: successors: %bb.61(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr52_sgpr53, implicit-def $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 + ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr54_sgpr55, implicit-def $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.61.Flow30: ; GFX90A-NEXT: successors: %bb.55(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_OR_B64 killed renamable $sgpr50_sgpr51, killed renamable $sgpr56_sgpr57, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr44_sgpr45, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr52_sgpr53, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.62.bb140: ; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc @@ -889,14 +891,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.63.Flow13: ; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.64.bb159: ; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -905,21 +907,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.65.Flow10: ; GFX90A-NEXT: successors: %bb.66(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.66.Flow14: ; GFX90A-NEXT: successors: %bb.8(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = COPY $exec + ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec ; GFX90A-NEXT: S_BRANCH %bb.8 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.67.bb161: ; GFX90A-NEXT: successors: %bb.65(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr25, implicit $exec @@ -938,7 +940,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.68.bb174: ; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec ; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec @@ -954,14 +956,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.69.Flow: ; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.70.bb186: ; GFX90A-NEXT: successors: %bb.71(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr27, implicit $exec @@ -990,14 +992,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.71.Flow9: ; GFX90A-NEXT: successors: %bb.63(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0 ; GFX90A-NEXT: S_BRANCH %bb.63 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.72.bb196: ; GFX90A-NEXT: successors: %bb.69(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec ; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index 05c2e0077f4ae..7c0c433ac3c51 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -900,9 +900,8 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: s_waitcnt expcnt(1) +; CHECK-NEXT: s_waitcnt expcnt(0) ; CHECK-NEXT: v_writelane_b32 v0, s30, 0 ; CHECK-NEXT: v_writelane_b32 v0, s31, 1 ; CHECK-NEXT: v_writelane_b32 v0, s33, 2 @@ -910,73 +909,40 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: v_writelane_b32 v0, s35, 4 ; CHECK-NEXT: v_writelane_b32 v0, s36, 5 ; CHECK-NEXT: v_writelane_b32 v0, s37, 6 -; CHECK-NEXT: v_writelane_b32 v0, s38, 7 -; CHECK-NEXT: v_writelane_b32 v0, s39, 8 -; CHECK-NEXT: v_writelane_b32 v0, s40, 9 -; CHECK-NEXT: v_writelane_b32 v0, s41, 10 -; CHECK-NEXT: v_writelane_b32 v0, s42, 11 -; CHECK-NEXT: v_writelane_b32 v0, s43, 12 -; CHECK-NEXT: v_writelane_b32 v0, s44, 13 -; CHECK-NEXT: v_writelane_b32 v0, s45, 14 -; CHECK-NEXT: v_writelane_b32 v0, s46, 15 -; CHECK-NEXT: v_writelane_b32 v0, s47, 16 -; CHECK-NEXT: v_writelane_b32 v0, s48, 17 -; CHECK-NEXT: v_writelane_b32 v0, s49, 18 -; CHECK-NEXT: v_writelane_b32 v0, s50, 19 -; CHECK-NEXT: v_writelane_b32 v0, s51, 20 -; CHECK-NEXT: v_writelane_b32 v0, s52, 21 -; CHECK-NEXT: v_writelane_b32 v0, s53, 22 -; CHECK-NEXT: v_writelane_b32 v0, s54, 23 -; CHECK-NEXT: v_writelane_b32 v0, s55, 24 -; CHECK-NEXT: v_writelane_b32 v0, s56, 25 -; CHECK-NEXT: v_writelane_b32 v0, s57, 26 -; CHECK-NEXT: v_writelane_b32 v0, s58, 27 -; CHECK-NEXT: v_writelane_b32 v0, s59, 28 -; CHECK-NEXT: v_writelane_b32 v0, s60, 29 -; CHECK-NEXT: v_writelane_b32 v0, s61, 30 -; CHECK-NEXT: v_writelane_b32 v0, s62, 31 -; CHECK-NEXT: v_writelane_b32 v0, s63, 32 -; CHECK-NEXT: v_writelane_b32 v0, s64, 33 -; CHECK-NEXT: v_writelane_b32 v0, s65, 34 -; CHECK-NEXT: v_writelane_b32 v0, s66, 35 -; CHECK-NEXT: v_writelane_b32 v0, s67, 36 -; CHECK-NEXT: v_writelane_b32 v0, s68, 37 -; CHECK-NEXT: v_writelane_b32 v0, s69, 38 -; CHECK-NEXT: v_writelane_b32 v0, s70, 39 -; CHECK-NEXT: v_writelane_b32 v0, s71, 40 -; CHECK-NEXT: v_writelane_b32 v0, s72, 41 -; CHECK-NEXT: v_writelane_b32 v0, s73, 42 -; CHECK-NEXT: v_writelane_b32 v0, s74, 43 -; CHECK-NEXT: v_writelane_b32 v0, s75, 44 -; CHECK-NEXT: v_writelane_b32 v0, s76, 45 -; CHECK-NEXT: v_writelane_b32 v0, s77, 46 -; CHECK-NEXT: v_writelane_b32 v0, s78, 47 -; CHECK-NEXT: v_writelane_b32 v0, s79, 48 -; CHECK-NEXT: v_writelane_b32 v0, s80, 49 -; CHECK-NEXT: v_writelane_b32 v0, s81, 50 -; CHECK-NEXT: v_writelane_b32 v0, s82, 51 -; CHECK-NEXT: v_writelane_b32 v0, s83, 52 -; CHECK-NEXT: v_writelane_b32 v0, s84, 53 -; CHECK-NEXT: v_writelane_b32 v0, s85, 54 -; CHECK-NEXT: v_writelane_b32 v0, s86, 55 -; CHECK-NEXT: v_writelane_b32 v0, s87, 56 -; CHECK-NEXT: v_writelane_b32 v0, s88, 57 -; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v1, s95, 0 -; CHECK-NEXT: v_writelane_b32 v0, s89, 58 -; CHECK-NEXT: v_writelane_b32 v1, s96, 1 -; CHECK-NEXT: v_writelane_b32 v0, s90, 59 -; CHECK-NEXT: v_writelane_b32 v1, s97, 2 -; CHECK-NEXT: v_writelane_b32 v0, s91, 60 -; CHECK-NEXT: v_writelane_b32 v1, s98, 3 -; CHECK-NEXT: v_writelane_b32 v0, s92, 61 -; CHECK-NEXT: v_writelane_b32 v1, s99, 4 -; CHECK-NEXT: s_mov_b32 s31, s12 -; CHECK-NEXT: v_writelane_b32 v0, s93, 62 -; CHECK-NEXT: v_writelane_b32 v1, s100, 5 -; CHECK-NEXT: s_cmp_eq_u32 s31, 0 -; CHECK-NEXT: v_writelane_b32 v0, s94, 63 -; CHECK-NEXT: v_writelane_b32 v1, s101, 6 +; CHECK-NEXT: v_writelane_b32 v0, s46, 7 +; CHECK-NEXT: v_writelane_b32 v0, s47, 8 +; CHECK-NEXT: v_writelane_b32 v0, s48, 9 +; CHECK-NEXT: v_writelane_b32 v0, s49, 10 +; CHECK-NEXT: v_writelane_b32 v0, s50, 11 +; CHECK-NEXT: v_writelane_b32 v0, s51, 12 +; CHECK-NEXT: v_writelane_b32 v0, s52, 13 +; CHECK-NEXT: v_writelane_b32 v0, s53, 14 +; CHECK-NEXT: v_writelane_b32 v0, s62, 15 +; CHECK-NEXT: v_writelane_b32 v0, s63, 16 +; CHECK-NEXT: v_writelane_b32 v0, s64, 17 +; CHECK-NEXT: v_writelane_b32 v0, s65, 18 +; CHECK-NEXT: v_writelane_b32 v0, s66, 19 +; CHECK-NEXT: v_writelane_b32 v0, s67, 20 +; CHECK-NEXT: v_writelane_b32 v0, s68, 21 +; CHECK-NEXT: v_writelane_b32 v0, s69, 22 +; CHECK-NEXT: v_writelane_b32 v0, s78, 23 +; CHECK-NEXT: v_writelane_b32 v0, s79, 24 +; CHECK-NEXT: v_writelane_b32 v0, s80, 25 +; CHECK-NEXT: v_writelane_b32 v0, s81, 26 +; CHECK-NEXT: v_writelane_b32 v0, s82, 27 +; CHECK-NEXT: v_writelane_b32 v0, s83, 28 +; CHECK-NEXT: v_writelane_b32 v0, s84, 29 +; CHECK-NEXT: v_writelane_b32 v0, s85, 30 +; CHECK-NEXT: v_writelane_b32 v0, s94, 31 +; CHECK-NEXT: v_writelane_b32 v0, s95, 32 +; CHECK-NEXT: v_writelane_b32 v0, s96, 33 +; CHECK-NEXT: v_writelane_b32 v0, s97, 34 +; CHECK-NEXT: v_writelane_b32 v0, s98, 35 +; CHECK-NEXT: v_writelane_b32 v0, s99, 36 +; CHECK-NEXT: s_mov_b32 s38, s12 +; CHECK-NEXT: v_writelane_b32 v0, s100, 37 +; CHECK-NEXT: s_cmp_eq_u32 s38, 0 +; CHECK-NEXT: v_writelane_b32 v0, s101, 38 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1292,9 +1258,9 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: s_cbranch_scc0 .LBB1_1 ; CHECK-NEXT: ; %bb.3: ; %entry ; CHECK-NEXT: s_not_b64 exec, exec -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 -; CHECK-NEXT: v_writelane_b32 v2, s0, 0 -; CHECK-NEXT: v_writelane_b32 v2, s1, 1 +; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 +; CHECK-NEXT: v_writelane_b32 v1, s0, 0 +; CHECK-NEXT: v_writelane_b32 v1, s1, 1 ; CHECK-NEXT: s_getpc_b64 s[0:1] ; CHECK-NEXT: .Lpost_getpc1: ; CHECK-NEXT: s_add_u32 s0, s0, (.LBB1_4-.Lpost_getpc1)&4294967295 @@ -1313,9 +1279,9 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: s_branch .LBB1_2 ; CHECK-NEXT: .LBB1_4: ; %bb3 -; CHECK-NEXT: v_readlane_b32 s0, v2, 0 -; CHECK-NEXT: v_readlane_b32 s1, v2, 1 -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 +; CHECK-NEXT: v_readlane_b32 s0, v1, 0 +; CHECK-NEXT: v_readlane_b32 s1, v1, 1 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; CHECK-NEXT: s_not_b64 exec, exec ; CHECK-NEXT: .LBB1_2: ; %bb3 ; CHECK-NEXT: ;;#ASMSTART @@ -1630,70 +1596,38 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s101, v1, 6 -; CHECK-NEXT: v_readlane_b32 s100, v1, 5 -; CHECK-NEXT: v_readlane_b32 s99, v1, 4 -; CHECK-NEXT: v_readlane_b32 s98, v1, 3 -; CHECK-NEXT: v_readlane_b32 s97, v1, 2 -; CHECK-NEXT: v_readlane_b32 s96, v1, 1 -; CHECK-NEXT: v_readlane_b32 s95, v1, 0 -; CHECK-NEXT: v_readlane_b32 s94, v0, 63 -; CHECK-NEXT: v_readlane_b32 s93, v0, 62 -; CHECK-NEXT: v_readlane_b32 s92, v0, 61 -; CHECK-NEXT: v_readlane_b32 s91, v0, 60 -; CHECK-NEXT: v_readlane_b32 s90, v0, 59 -; CHECK-NEXT: v_readlane_b32 s89, v0, 58 -; CHECK-NEXT: v_readlane_b32 s88, v0, 57 -; CHECK-NEXT: v_readlane_b32 s87, v0, 56 -; CHECK-NEXT: v_readlane_b32 s86, v0, 55 -; CHECK-NEXT: v_readlane_b32 s85, v0, 54 -; CHECK-NEXT: v_readlane_b32 s84, v0, 53 -; CHECK-NEXT: v_readlane_b32 s83, v0, 52 -; CHECK-NEXT: v_readlane_b32 s82, v0, 51 -; CHECK-NEXT: v_readlane_b32 s81, v0, 50 -; CHECK-NEXT: v_readlane_b32 s80, v0, 49 -; CHECK-NEXT: v_readlane_b32 s79, v0, 48 -; CHECK-NEXT: v_readlane_b32 s78, v0, 47 -; CHECK-NEXT: v_readlane_b32 s77, v0, 46 -; CHECK-NEXT: v_readlane_b32 s76, v0, 45 -; CHECK-NEXT: v_readlane_b32 s75, v0, 44 -; CHECK-NEXT: v_readlane_b32 s74, v0, 43 -; CHECK-NEXT: v_readlane_b32 s73, v0, 42 -; CHECK-NEXT: v_readlane_b32 s72, v0, 41 -; CHECK-NEXT: v_readlane_b32 s71, v0, 40 -; CHECK-NEXT: v_readlane_b32 s70, v0, 39 -; CHECK-NEXT: v_readlane_b32 s69, v0, 38 -; CHECK-NEXT: v_readlane_b32 s68, v0, 37 -; CHECK-NEXT: v_readlane_b32 s67, v0, 36 -; CHECK-NEXT: v_readlane_b32 s66, v0, 35 -; CHECK-NEXT: v_readlane_b32 s65, v0, 34 -; CHECK-NEXT: v_readlane_b32 s64, v0, 33 -; CHECK-NEXT: v_readlane_b32 s63, v0, 32 -; CHECK-NEXT: v_readlane_b32 s62, v0, 31 -; CHECK-NEXT: v_readlane_b32 s61, v0, 30 -; CHECK-NEXT: v_readlane_b32 s60, v0, 29 -; CHECK-NEXT: v_readlane_b32 s59, v0, 28 -; CHECK-NEXT: v_readlane_b32 s58, v0, 27 -; CHECK-NEXT: v_readlane_b32 s57, v0, 26 -; CHECK-NEXT: v_readlane_b32 s56, v0, 25 -; CHECK-NEXT: v_readlane_b32 s55, v0, 24 -; CHECK-NEXT: v_readlane_b32 s54, v0, 23 -; CHECK-NEXT: v_readlane_b32 s53, v0, 22 -; CHECK-NEXT: v_readlane_b32 s52, v0, 21 -; CHECK-NEXT: v_readlane_b32 s51, v0, 20 -; CHECK-NEXT: v_readlane_b32 s50, v0, 19 -; CHECK-NEXT: v_readlane_b32 s49, v0, 18 -; CHECK-NEXT: v_readlane_b32 s48, v0, 17 -; CHECK-NEXT: v_readlane_b32 s47, v0, 16 -; CHECK-NEXT: v_readlane_b32 s46, v0, 15 -; CHECK-NEXT: v_readlane_b32 s45, v0, 14 -; CHECK-NEXT: v_readlane_b32 s44, v0, 13 -; CHECK-NEXT: v_readlane_b32 s43, v0, 12 -; CHECK-NEXT: v_readlane_b32 s42, v0, 11 -; CHECK-NEXT: v_readlane_b32 s41, v0, 10 -; CHECK-NEXT: v_readlane_b32 s40, v0, 9 -; CHECK-NEXT: v_readlane_b32 s39, v0, 8 -; CHECK-NEXT: v_readlane_b32 s38, v0, 7 +; CHECK-NEXT: v_readlane_b32 s101, v0, 38 +; CHECK-NEXT: v_readlane_b32 s100, v0, 37 +; CHECK-NEXT: v_readlane_b32 s99, v0, 36 +; CHECK-NEXT: v_readlane_b32 s98, v0, 35 +; CHECK-NEXT: v_readlane_b32 s97, v0, 34 +; CHECK-NEXT: v_readlane_b32 s96, v0, 33 +; CHECK-NEXT: v_readlane_b32 s95, v0, 32 +; CHECK-NEXT: v_readlane_b32 s94, v0, 31 +; CHECK-NEXT: v_readlane_b32 s85, v0, 30 +; CHECK-NEXT: v_readlane_b32 s84, v0, 29 +; CHECK-NEXT: v_readlane_b32 s83, v0, 28 +; CHECK-NEXT: v_readlane_b32 s82, v0, 27 +; CHECK-NEXT: v_readlane_b32 s81, v0, 26 +; CHECK-NEXT: v_readlane_b32 s80, v0, 25 +; CHECK-NEXT: v_readlane_b32 s79, v0, 24 +; CHECK-NEXT: v_readlane_b32 s78, v0, 23 +; CHECK-NEXT: v_readlane_b32 s69, v0, 22 +; CHECK-NEXT: v_readlane_b32 s68, v0, 21 +; CHECK-NEXT: v_readlane_b32 s67, v0, 20 +; CHECK-NEXT: v_readlane_b32 s66, v0, 19 +; CHECK-NEXT: v_readlane_b32 s65, v0, 18 +; CHECK-NEXT: v_readlane_b32 s64, v0, 17 +; CHECK-NEXT: v_readlane_b32 s63, v0, 16 +; CHECK-NEXT: v_readlane_b32 s62, v0, 15 +; CHECK-NEXT: v_readlane_b32 s53, v0, 14 +; CHECK-NEXT: v_readlane_b32 s52, v0, 13 +; CHECK-NEXT: v_readlane_b32 s51, v0, 12 +; CHECK-NEXT: v_readlane_b32 s50, v0, 11 +; CHECK-NEXT: v_readlane_b32 s49, v0, 10 +; CHECK-NEXT: v_readlane_b32 s48, v0, 9 +; CHECK-NEXT: v_readlane_b32 s47, v0, 8 +; CHECK-NEXT: v_readlane_b32 s46, v0, 7 ; CHECK-NEXT: v_readlane_b32 s37, v0, 6 ; CHECK-NEXT: v_readlane_b32 s36, v0, 5 ; CHECK-NEXT: v_readlane_b32 s35, v0, 4 @@ -1703,7 +1637,6 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s30, v0, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll index d4c50cf2c7e4a..34f4476f7fd6a 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll @@ -1,6 +1,6 @@ -; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -enable-var-scope %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs=0 -filetype=null %s 2>&1 | FileCheck -enable-var-scope %s -; CHECK: LLVM ERROR: failed to find free scratch register +; CHECK: illegal VGPR to SGPR copy declare hidden void @external_void_func_a15i32_inreg([15 x i32] inreg) #0 declare hidden void @external_void_func_a16i32_inreg([16 x i32] inreg) #0 diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index 0b8ad359ccb94..394c32c8e4bcf 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -1385,15 +1385,15 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s29, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 vcc, -1 +; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, vcc +; GFX9-NEXT: s_mov_b64 exec, s[38:39] ; GFX9-NEXT: v_writelane_b32 v40, s29, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 vcc -; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[38:39] +; GFX9-NEXT: s_add_u32 s38, s38, external_void_func_a15i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s39, s39, external_void_func_a15i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s3, s19 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 @@ -1408,7 +1408,7 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: s_mov_b32 s23, s27 ; GFX9-NEXT: s_mov_b32 s24, s28 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], vcc +; GFX9-NEXT: s_swappc_b64 s[30:31], s[38:39] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 9bbecacd6c774..48f32a87203a3 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -63,18 +63,18 @@ declare hidden void @external_void_func_v16i8(<16 x i8>) #0 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; VI-LABEL: test_call_external_void_func_i1_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -82,18 +82,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_i1_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -101,18 +101,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_i1_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -155,18 +155,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -178,18 +178,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -201,18 +201,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -265,18 +265,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: v_and_b32_e32 v0, 1, v0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -288,18 +288,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: v_and_b32_e32 v0, 1, v0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -311,18 +311,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -370,18 +370,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_i8_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -389,18 +389,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_i8_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -408,18 +408,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_i8_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -463,18 +463,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -485,18 +485,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -507,18 +507,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -567,18 +567,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -589,18 +589,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -611,18 +611,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -667,18 +667,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_i16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -686,18 +686,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_i16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -705,18 +705,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_i16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -759,18 +759,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -781,18 +781,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -803,18 +803,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -863,18 +863,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -885,18 +885,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -907,18 +907,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -963,18 +963,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -982,18 +982,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1001,18 +1001,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1051,18 +1051,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; VI-LABEL: test_call_external_void_func_i64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: s_mov_b32 s32, 0 @@ -1071,18 +1071,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_i64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_mov_b32 s32, 0 @@ -1091,18 +1091,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_i64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -1143,75 +1143,79 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; VI-LABEL: test_call_external_void_func_v2i64: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], 0 +; VI-NEXT: s_mov_b32 s0, 0 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i64: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], 0 +; CI-NEXT: s_mov_b32 s0, 0 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v2i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_void_func_v2i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[4:5], 0 +; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12 @@ -1225,10 +1229,11 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 +; HSA-NEXT: s_mov_b32 s8, 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 -; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 +; HSA-NEXT: s_mov_b32 s9, s8 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 @@ -1247,18 +1252,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2i64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -1269,18 +1274,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2i64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -1291,18 +1296,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2i64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -1348,22 +1353,23 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; VI-LABEL: test_call_external_void_func_v3i64: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], 0 +; VI-NEXT: s_mov_b32 s0, 0 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 2 ; VI-NEXT: s_mov_b32 s32, 0 @@ -1372,22 +1378,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; ; CI-LABEL: test_call_external_void_func_v3i64: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], 0 +; CI-NEXT: s_mov_b32 s0, 0 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 2 ; CI-NEXT: s_mov_b32 s32, 0 @@ -1396,22 +1403,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -1420,9 +1428,10 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; ; GFX11-LABEL: test_call_external_void_func_v3i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[4:5], 0 +; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] @@ -1437,10 +1446,11 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 +; HSA-NEXT: s_mov_b32 s8, 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 -; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 +; HSA-NEXT: s_mov_b32 s9, s8 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 @@ -1463,22 +1473,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; VI-LABEL: test_call_external_void_func_v4i64: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], 0 +; VI-NEXT: s_mov_b32 s0, 0 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 2 ; VI-NEXT: v_mov_b32_e32 v6, 3 @@ -1489,22 +1500,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; ; CI-LABEL: test_call_external_void_func_v4i64: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], 0 +; CI-NEXT: s_mov_b32 s0, 0 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 +; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 2 ; CI-NEXT: v_mov_b32_e32 v6, 3 @@ -1515,22 +1527,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], 0 +; GFX9-NEXT: s_mov_b32 s0, 0 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 +; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_mov_b32_e32 v6, 3 @@ -1541,9 +1554,10 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; ; GFX11-LABEL: test_call_external_void_func_v4i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b64 s[4:5], 0 +; GFX11-NEXT: s_mov_b32 s4, 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 @@ -1559,10 +1573,11 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 +; HSA-NEXT: s_mov_b32 s8, 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 -; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 +; HSA-NEXT: s_mov_b32 s9, s8 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 @@ -1586,18 +1601,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; VI-LABEL: test_call_external_void_func_f16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1605,18 +1620,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_f16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1624,18 +1639,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_f16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1674,18 +1689,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 4.0 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1693,18 +1708,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1712,18 +1727,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1762,18 +1777,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: s_mov_b32 s32, 0 @@ -1782,18 +1797,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: s_mov_b32 s32, 0 @@ -1802,18 +1817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -1854,18 +1869,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1875,18 +1890,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1896,18 +1911,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1951,18 +1966,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v5f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1974,18 +1989,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v5f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1997,18 +2012,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v5f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2057,18 +2072,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_f64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0x40100000 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2077,18 +2092,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_f64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0x40100000 ; CI-NEXT: s_mov_b32 s32, 0 @@ -2097,18 +2112,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_f64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -2149,18 +2164,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2f64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 0 @@ -2171,18 +2186,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2f64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 0 @@ -2193,18 +2208,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2f64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2250,18 +2265,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 0 @@ -2274,18 +2289,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3f64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 0 @@ -2298,18 +2313,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2360,42 +2375,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; VI-LABEL: test_call_external_void_func_v2i16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2404,21 +2419,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2461,42 +2476,42 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; VI-LABEL: test_call_external_void_func_v3i16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16 @@ -2507,21 +2522,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2564,42 +2579,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; VI-LABEL: test_call_external_void_func_v3f16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3f16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 @@ -2611,21 +2626,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2668,18 +2683,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3i16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x20001 ; VI-NEXT: v_mov_b32_e32 v1, 3 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2688,18 +2703,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3i16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -2709,18 +2724,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -2761,18 +2776,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; VI-NEXT: v_mov_b32_e32 v1, 0x4400 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2781,18 +2796,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3f16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2802,18 +2817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -2855,42 +2870,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; VI-LABEL: test_call_external_void_func_v4i16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 @@ -2902,21 +2917,21 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2959,18 +2974,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v4i16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 0x20001 ; VI-NEXT: v_mov_b32_e32 v1, 0x40003 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2979,18 +2994,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v4i16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -3001,18 +3016,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -3054,42 +3069,42 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; VI-LABEL: test_call_external_void_func_v2f16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2f16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 @@ -3100,21 +3115,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2f16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3157,63 +3172,63 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; VI-LABEL: test_call_external_void_func_v2i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v2i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3256,18 +3271,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: s_mov_b32 s32, 0 @@ -3276,18 +3291,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: s_mov_b32 s32, 0 @@ -3296,18 +3311,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -3348,18 +3363,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_v3i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: v_mov_b32_e32 v1, 4 ; VI-NEXT: v_mov_b32_e32 v2, 5 @@ -3369,18 +3384,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_v3i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: v_mov_b32_e32 v1, 4 ; CI-NEXT: v_mov_b32_e32 v2, 5 @@ -3390,18 +3405,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 @@ -3445,18 +3460,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_v3i32_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: v_mov_b32_e32 v1, 4 ; VI-NEXT: v_mov_b32_e32 v2, 5 @@ -3467,18 +3482,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_v3i32_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: v_mov_b32_e32 v1, 4 ; CI-NEXT: v_mov_b32_e32 v2, 5 @@ -3489,18 +3504,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i32_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 @@ -3546,63 +3561,63 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; VI-LABEL: test_call_external_void_func_v4i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3645,18 +3660,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v4i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -3667,18 +3682,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v4i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -3689,18 +3704,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3746,18 +3761,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v5i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -3769,18 +3784,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v5i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -3792,18 +3807,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v5i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3852,72 +3867,72 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; VI-LABEL: test_call_external_void_func_v8i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v8i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v8i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3968,18 +3983,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v8i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -3994,18 +4009,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v8i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -4020,18 +4035,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v8i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -4087,13 +4102,13 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; VI-LABEL: test_call_external_void_func_v16i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) @@ -4101,25 +4116,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v16i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -4127,25 +4142,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v16i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -4153,12 +4168,12 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -4216,8 +4231,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 @@ -4227,19 +4242,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_getpc_b64 s[8:9] ; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_waitcnt vmcnt(7) -; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; VI-NEXT: s_endpgm ; @@ -4248,8 +4263,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 @@ -4259,19 +4274,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_getpc_b64 s[8:9] ; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_waitcnt vmcnt(7) -; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CI-NEXT: s_endpgm ; @@ -4280,8 +4295,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 @@ -4291,19 +4306,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_getpc_b64 s[8:9] ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_waitcnt vmcnt(7) -; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: s_endpgm ; @@ -4369,15 +4384,15 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_v32i32_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 @@ -4389,30 +4404,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_waitcnt vmcnt(8) -; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 +; VI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(8) -; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v32i32_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 @@ -4424,30 +4439,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_waitcnt vmcnt(8) -; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 +; CI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(8) -; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v32i32_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 @@ -4459,16 +4474,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_waitcnt vmcnt(8) -; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(8) -; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 +; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm ; @@ -4542,89 +4557,89 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; VI-LABEL: test_call_external_i32_func_i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s42, -1 -; VI-NEXT: s_mov_b32 s43, 0xe80000 -; VI-NEXT: s_add_u32 s40, s40, s5 -; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 -; VI-NEXT: s_addc_u32 s41, s41, 0 +; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s66, -1 +; VI-NEXT: s_mov_b32 s67, 0xe80000 +; VI-NEXT: s_add_u32 s64, s64, s5 +; VI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24 +; VI-NEXT: s_addc_u32 s65, s65, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[40:41] +; VI-NEXT: s_mov_b64 s[0:1], s[64:65] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[42:43] +; VI-NEXT: s_mov_b64 s[2:3], s[66:67] ; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 s32, 0 -; VI-NEXT: s_mov_b32 s39, 0xf000 -; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s51, 0xf000 +; VI-NEXT: s_mov_b32 s50, -1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_i32_func_i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s42, -1 -; CI-NEXT: s_mov_b32 s43, 0xe8f000 -; CI-NEXT: s_add_u32 s40, s40, s5 -; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9 -; CI-NEXT: s_addc_u32 s41, s41, 0 +; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s66, -1 +; CI-NEXT: s_mov_b32 s67, 0xe8f000 +; CI-NEXT: s_add_u32 s64, s64, s5 +; CI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x9 +; CI-NEXT: s_addc_u32 s65, s65, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[40:41] +; CI-NEXT: s_mov_b64 s[0:1], s[64:65] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[42:43] +; CI-NEXT: s_mov_b64 s[2:3], s[66:67] ; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 s32, 0 -; CI-NEXT: s_mov_b32 s39, 0xf000 -; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s51, 0xf000 +; CI-NEXT: s_mov_b32 s50, -1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_i32_func_i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s42, -1 -; GFX9-NEXT: s_mov_b32 s43, 0xe00000 -; GFX9-NEXT: s_add_u32 s40, s40, s5 -; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 -; GFX9-NEXT: s_addc_u32 s41, s41, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s5 +; GFX9-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_mov_b32 s39, 0xf000 -; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xf000 +; GFX9-NEXT: s_mov_b32 s50, -1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_i32_func_i32_imm: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24 +; GFX11-NEXT: s_load_b64 s[48:49], s[2:3], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 -; GFX11-NEXT: s_mov_b32 s39, 0x31016000 -; GFX11-NEXT: s_mov_b32 s38, -1 +; GFX11-NEXT: s_mov_b32 s51, 0x31016000 +; GFX11-NEXT: s_mov_b32 s50, -1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc +; GFX11-NEXT: buffer_store_b32 v0, off, s[48:51], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_endpgm ; @@ -4632,7 +4647,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 -; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0 +; HSA-NEXT: s_load_dwordx2 s[48:49], s[6:7], 0x0 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 @@ -4642,10 +4657,10 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 42 ; HSA-NEXT: s_mov_b32 s32, 0 -; HSA-NEXT: s_mov_b32 s39, 0x1100f000 -; HSA-NEXT: s_mov_b32 s38, -1 +; HSA-NEXT: s_mov_b32 s51, 0x1100f000 +; HSA-NEXT: s_mov_b32 s50, -1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] -; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; HSA-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_endpgm %val = call i32 @external_i32_func_i32(i32 42) @@ -4656,72 +4671,72 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; VI-LABEL: test_call_external_void_func_struct_i8_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_struct_i8_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -4772,86 +4787,86 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: v_mov_b32_e32 v0, 3 -; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0 ; VI-NEXT: v_mov_b32_e32 v0, 8 -; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 -; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 +; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 +; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_movk_i32 s32, 0x400 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: v_mov_b32_e32 v0, 3 -; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0 ; CI-NEXT: v_mov_b32_e32 v0, 8 -; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 -; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 +; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 +; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_movk_i32 s32, 0x400 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_movk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm ; @@ -4908,33 +4923,33 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s5 -; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: v_mov_b32_e32 v0, 3 -; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0 ; VI-NEXT: v_mov_b32_e32 v0, 8 -; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 -; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 +; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 +; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; VI-NEXT: s_movk_i32 s32, 0x800 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32 ; VI-NEXT: v_mov_b32_e32 v0, 8 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 -; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 +; VI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8 +; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(1) @@ -4946,33 +4961,33 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; ; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s5 -; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: v_mov_b32_e32 v0, 3 -; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0 ; CI-NEXT: v_mov_b32_e32 v0, 8 -; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 -; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 +; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 +; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; CI-NEXT: s_movk_i32 s32, 0x800 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32 ; CI-NEXT: v_mov_b32_e32 v0, 8 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 -; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 +; CI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8 +; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt vmcnt(1) @@ -4984,34 +4999,34 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s5 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 +; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 -; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 +; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 -; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 +; GFX9-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8 +; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt vmcnt(1) @@ -5106,23 +5121,23 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; VI-LABEL: test_call_external_void_func_v16i8: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s38, -1 -; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s37, s37, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[36:37] +; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[38:39] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 @@ -5148,23 +5163,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; ; CI-LABEL: test_call_external_void_func_v16i8: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s38, -1 -; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s37, s37, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[36:37] +; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[38:39] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 @@ -5190,23 +5205,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v16i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 @@ -5309,29 +5324,29 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { ; VI-LABEL: stack_passed_arg_alignment_v32i32_f64: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s54, -1 -; VI-NEXT: s_mov_b32 s55, 0xe80000 -; VI-NEXT: s_add_u32 s52, s52, s5 +; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s66, -1 +; VI-NEXT: s_mov_b32 s67, 0xe80000 +; VI-NEXT: s_add_u32 s64, s64, s5 ; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 ; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; VI-NEXT: s_mov_b32 s32, 0 -; VI-NEXT: s_addc_u32 s53, s53, 0 +; VI-NEXT: s_addc_u32 s65, s65, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s23 -; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, s5 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[52:53] -; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 +; VI-NEXT: s_mov_b64 s[0:1], s[64:65] +; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[54:55] +; VI-NEXT: s_mov_b64 s[2:3], s[66:67] ; VI-NEXT: v_mov_b32_e32 v0, s36 ; VI-NEXT: v_mov_b32_e32 v1, s37 ; VI-NEXT: v_mov_b32_e32 v2, s38 @@ -5368,29 +5383,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; ; CI-LABEL: stack_passed_arg_alignment_v32i32_f64: ; CI: ; %bb.0: ; %entry -; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s54, -1 -; CI-NEXT: s_mov_b32 s55, 0xe8f000 -; CI-NEXT: s_add_u32 s52, s52, s5 +; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s66, -1 +; CI-NEXT: s_mov_b32 s67, 0xe8f000 +; CI-NEXT: s_add_u32 s64, s64, s5 ; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19 ; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29 ; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9 ; CI-NEXT: s_mov_b32 s32, 0 -; CI-NEXT: s_addc_u32 s53, s53, 0 +; CI-NEXT: s_addc_u32 s65, s65, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s23 -; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 ; CI-NEXT: v_mov_b32_e32 v0, s4 -; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 +; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, s5 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[52:53] -; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 +; CI-NEXT: s_mov_b64 s[0:1], s[64:65] +; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[54:55] +; CI-NEXT: s_mov_b64 s[2:3], s[66:67] ; CI-NEXT: v_mov_b32_e32 v0, s36 ; CI-NEXT: v_mov_b32_e32 v1, s37 ; CI-NEXT: v_mov_b32_e32 v2, s38 @@ -5427,29 +5442,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s54, -1 -; GFX9-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-NEXT: s_add_u32 s52, s52, s5 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s5 ; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s23 -; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, s5 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, s36 ; GFX9-NEXT: v_mov_b32_e32 v1, s37 ; GFX9-NEXT: v_mov_b32_e32 v2, s38 diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index db9ce56ecc3cc..67a70cdeb1ecc 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -528,15 +528,16 @@ define void @callee_saved_sgpr_func() #2 { ; MUBUF-NEXT: s_getpc_b64 s[4:5] ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 -; MUBUF-NEXT: v_writelane_b32 v40, s40, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s34, 2 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def s40 ; MUBUF-NEXT: ;;#ASMEND +; MUBUF-NEXT: s_mov_b32 s34, s40 ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; use s40 +; MUBUF-NEXT: ; use s34 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s40, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s34, v40, 2 ; MUBUF-NEXT: v_readlane_b32 s31, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 @@ -563,15 +564,16 @@ define void @callee_saved_sgpr_func() #2 { ; FLATSCR-NEXT: s_getpc_b64 s[0:1] ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 -; FLATSCR-NEXT: v_writelane_b32 v40, s40, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: s_mov_b32 s34, s40 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s40 +; FLATSCR-NEXT: ; use s34 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s40, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 @@ -600,9 +602,10 @@ define amdgpu_kernel void @callee_saved_sgpr_kernel() #2 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: s_mov_b32 s33, s40 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s40 +; FLATSCR-NEXT: ; use s33 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: s_endpgm %s40 = call i32 asm sideeffect "; def s40", "={s40}"() #0 @@ -629,22 +632,23 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; MUBUF-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; MUBUF-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v41, s40, 2 +; MUBUF-NEXT: v_writelane_b32 v41, s34, 2 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def s40 ; MUBUF-NEXT: ;;#ASMEND +; MUBUF-NEXT: s_mov_b32 s34, s40 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; def v40 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; use s40 +; MUBUF-NEXT: ; use s34 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; use v40 ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: v_readlane_b32 s40, v41, 2 +; MUBUF-NEXT: v_readlane_b32 s34, v41, 2 ; MUBUF-NEXT: v_readlane_b32 s31, v41, 1 ; MUBUF-NEXT: v_readlane_b32 s30, v41, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 @@ -672,22 +676,23 @@ define void @callee_saved_sgpr_vgpr_func() #2 { ; FLATSCR-NEXT: s_add_u32 s0, s0, external_void_func_void@rel32@lo+4 ; FLATSCR-NEXT: s_addc_u32 s1, s1, external_void_func_void@rel32@hi+12 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v41, s40, 2 +; FLATSCR-NEXT: v_writelane_b32 v41, s34, 2 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: s_mov_b32 s34, s40 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def v40 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s40 +; FLATSCR-NEXT: ; use s34 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use v40 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload -; FLATSCR-NEXT: v_readlane_b32 s40, v41, 2 +; FLATSCR-NEXT: v_readlane_b32 s34, v41, 2 ; FLATSCR-NEXT: v_readlane_b32 s31, v41, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v41, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 @@ -718,13 +723,14 @@ define amdgpu_kernel void @callee_saved_sgpr_vgpr_kernel() #2 { ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def s40 ; FLATSCR-NEXT: ;;#ASMEND +; FLATSCR-NEXT: s_mov_b32 s33, s40 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; def v32 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: v_mov_b32_e32 v40, v32 ; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s40 +; FLATSCR-NEXT: ; use s33 ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use v40 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 4c6f2d22080e0..002e82f676e8b 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -255,52 +255,28 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v40, s36, 0 ; MUBUF-NEXT: v_writelane_b32 v40, s37, 1 -; MUBUF-NEXT: v_writelane_b32 v40, s38, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s39, 3 -; MUBUF-NEXT: v_writelane_b32 v40, s40, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s41, 5 -; MUBUF-NEXT: v_writelane_b32 v40, s42, 6 -; MUBUF-NEXT: v_writelane_b32 v40, s43, 7 -; MUBUF-NEXT: v_writelane_b32 v40, s44, 8 -; MUBUF-NEXT: v_writelane_b32 v40, s45, 9 -; MUBUF-NEXT: v_writelane_b32 v40, s46, 10 -; MUBUF-NEXT: v_writelane_b32 v40, s47, 11 -; MUBUF-NEXT: v_writelane_b32 v40, s48, 12 -; MUBUF-NEXT: v_writelane_b32 v40, s49, 13 -; MUBUF-NEXT: v_writelane_b32 v40, s50, 14 -; MUBUF-NEXT: v_writelane_b32 v40, s51, 15 -; MUBUF-NEXT: v_writelane_b32 v40, s52, 16 -; MUBUF-NEXT: v_writelane_b32 v40, s53, 17 -; MUBUF-NEXT: v_writelane_b32 v40, s54, 18 -; MUBUF-NEXT: v_writelane_b32 v40, s55, 19 -; MUBUF-NEXT: v_writelane_b32 v40, s56, 20 -; MUBUF-NEXT: v_writelane_b32 v40, s57, 21 -; MUBUF-NEXT: v_writelane_b32 v40, s58, 22 -; MUBUF-NEXT: v_writelane_b32 v40, s59, 23 -; MUBUF-NEXT: v_writelane_b32 v40, s60, 24 -; MUBUF-NEXT: v_writelane_b32 v40, s61, 25 -; MUBUF-NEXT: v_writelane_b32 v40, s62, 26 -; MUBUF-NEXT: v_writelane_b32 v40, s63, 27 -; MUBUF-NEXT: v_writelane_b32 v40, s64, 28 -; MUBUF-NEXT: v_writelane_b32 v40, s65, 29 -; MUBUF-NEXT: v_writelane_b32 v40, s66, 30 -; MUBUF-NEXT: v_writelane_b32 v40, s67, 31 -; MUBUF-NEXT: v_writelane_b32 v40, s68, 32 -; MUBUF-NEXT: v_writelane_b32 v40, s69, 33 -; MUBUF-NEXT: v_writelane_b32 v40, s70, 34 -; MUBUF-NEXT: v_writelane_b32 v40, s71, 35 -; MUBUF-NEXT: v_writelane_b32 v40, s72, 36 -; MUBUF-NEXT: v_writelane_b32 v40, s73, 37 -; MUBUF-NEXT: v_writelane_b32 v40, s74, 38 -; MUBUF-NEXT: v_writelane_b32 v40, s75, 39 -; MUBUF-NEXT: v_writelane_b32 v40, s76, 40 -; MUBUF-NEXT: v_writelane_b32 v40, s77, 41 -; MUBUF-NEXT: v_writelane_b32 v40, s78, 42 -; MUBUF-NEXT: v_writelane_b32 v40, s79, 43 -; MUBUF-NEXT: v_writelane_b32 v40, s80, 44 -; MUBUF-NEXT: v_writelane_b32 v40, s81, 45 -; MUBUF-NEXT: v_writelane_b32 v40, s82, 46 -; MUBUF-NEXT: v_writelane_b32 v40, s83, 47 +; MUBUF-NEXT: v_writelane_b32 v40, s46, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s47, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s48, 4 +; MUBUF-NEXT: v_writelane_b32 v40, s49, 5 +; MUBUF-NEXT: v_writelane_b32 v40, s50, 6 +; MUBUF-NEXT: v_writelane_b32 v40, s51, 7 +; MUBUF-NEXT: v_writelane_b32 v40, s52, 8 +; MUBUF-NEXT: v_writelane_b32 v40, s53, 9 +; MUBUF-NEXT: v_writelane_b32 v40, s62, 10 +; MUBUF-NEXT: v_writelane_b32 v40, s63, 11 +; MUBUF-NEXT: v_writelane_b32 v40, s64, 12 +; MUBUF-NEXT: v_writelane_b32 v40, s65, 13 +; MUBUF-NEXT: v_writelane_b32 v40, s66, 14 +; MUBUF-NEXT: v_writelane_b32 v40, s67, 15 +; MUBUF-NEXT: v_writelane_b32 v40, s68, 16 +; MUBUF-NEXT: v_writelane_b32 v40, s69, 17 +; MUBUF-NEXT: v_writelane_b32 v40, s78, 18 +; MUBUF-NEXT: v_writelane_b32 v40, s79, 19 +; MUBUF-NEXT: v_writelane_b32 v40, s80, 20 +; MUBUF-NEXT: v_writelane_b32 v40, s81, 21 +; MUBUF-NEXT: v_writelane_b32 v40, s82, 22 +; MUBUF-NEXT: v_writelane_b32 v40, s83, 23 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART @@ -347,52 +323,28 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; use s[4:19] ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s83, v40, 47 -; MUBUF-NEXT: v_readlane_b32 s82, v40, 46 -; MUBUF-NEXT: v_readlane_b32 s81, v40, 45 -; MUBUF-NEXT: v_readlane_b32 s80, v40, 44 -; MUBUF-NEXT: v_readlane_b32 s79, v40, 43 -; MUBUF-NEXT: v_readlane_b32 s78, v40, 42 -; MUBUF-NEXT: v_readlane_b32 s77, v40, 41 -; MUBUF-NEXT: v_readlane_b32 s76, v40, 40 -; MUBUF-NEXT: v_readlane_b32 s75, v40, 39 -; MUBUF-NEXT: v_readlane_b32 s74, v40, 38 -; MUBUF-NEXT: v_readlane_b32 s73, v40, 37 -; MUBUF-NEXT: v_readlane_b32 s72, v40, 36 -; MUBUF-NEXT: v_readlane_b32 s71, v40, 35 -; MUBUF-NEXT: v_readlane_b32 s70, v40, 34 -; MUBUF-NEXT: v_readlane_b32 s69, v40, 33 -; MUBUF-NEXT: v_readlane_b32 s68, v40, 32 -; MUBUF-NEXT: v_readlane_b32 s67, v40, 31 -; MUBUF-NEXT: v_readlane_b32 s66, v40, 30 -; MUBUF-NEXT: v_readlane_b32 s65, v40, 29 -; MUBUF-NEXT: v_readlane_b32 s64, v40, 28 -; MUBUF-NEXT: v_readlane_b32 s63, v40, 27 -; MUBUF-NEXT: v_readlane_b32 s62, v40, 26 -; MUBUF-NEXT: v_readlane_b32 s61, v40, 25 -; MUBUF-NEXT: v_readlane_b32 s60, v40, 24 -; MUBUF-NEXT: v_readlane_b32 s59, v40, 23 -; MUBUF-NEXT: v_readlane_b32 s58, v40, 22 -; MUBUF-NEXT: v_readlane_b32 s57, v40, 21 -; MUBUF-NEXT: v_readlane_b32 s56, v40, 20 -; MUBUF-NEXT: v_readlane_b32 s55, v40, 19 -; MUBUF-NEXT: v_readlane_b32 s54, v40, 18 -; MUBUF-NEXT: v_readlane_b32 s53, v40, 17 -; MUBUF-NEXT: v_readlane_b32 s52, v40, 16 -; MUBUF-NEXT: v_readlane_b32 s51, v40, 15 -; MUBUF-NEXT: v_readlane_b32 s50, v40, 14 -; MUBUF-NEXT: v_readlane_b32 s49, v40, 13 -; MUBUF-NEXT: v_readlane_b32 s48, v40, 12 -; MUBUF-NEXT: v_readlane_b32 s47, v40, 11 -; MUBUF-NEXT: v_readlane_b32 s46, v40, 10 -; MUBUF-NEXT: v_readlane_b32 s45, v40, 9 -; MUBUF-NEXT: v_readlane_b32 s44, v40, 8 -; MUBUF-NEXT: v_readlane_b32 s43, v40, 7 -; MUBUF-NEXT: v_readlane_b32 s42, v40, 6 -; MUBUF-NEXT: v_readlane_b32 s41, v40, 5 -; MUBUF-NEXT: v_readlane_b32 s40, v40, 4 -; MUBUF-NEXT: v_readlane_b32 s39, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s38, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s83, v40, 23 +; MUBUF-NEXT: v_readlane_b32 s82, v40, 22 +; MUBUF-NEXT: v_readlane_b32 s81, v40, 21 +; MUBUF-NEXT: v_readlane_b32 s80, v40, 20 +; MUBUF-NEXT: v_readlane_b32 s79, v40, 19 +; MUBUF-NEXT: v_readlane_b32 s78, v40, 18 +; MUBUF-NEXT: v_readlane_b32 s69, v40, 17 +; MUBUF-NEXT: v_readlane_b32 s68, v40, 16 +; MUBUF-NEXT: v_readlane_b32 s67, v40, 15 +; MUBUF-NEXT: v_readlane_b32 s66, v40, 14 +; MUBUF-NEXT: v_readlane_b32 s65, v40, 13 +; MUBUF-NEXT: v_readlane_b32 s64, v40, 12 +; MUBUF-NEXT: v_readlane_b32 s63, v40, 11 +; MUBUF-NEXT: v_readlane_b32 s62, v40, 10 +; MUBUF-NEXT: v_readlane_b32 s53, v40, 9 +; MUBUF-NEXT: v_readlane_b32 s52, v40, 8 +; MUBUF-NEXT: v_readlane_b32 s51, v40, 7 +; MUBUF-NEXT: v_readlane_b32 s50, v40, 6 +; MUBUF-NEXT: v_readlane_b32 s49, v40, 5 +; MUBUF-NEXT: v_readlane_b32 s48, v40, 4 +; MUBUF-NEXT: v_readlane_b32 s47, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s46, v40, 2 ; MUBUF-NEXT: v_readlane_b32 s37, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s36, v40, 0 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -409,48 +361,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] ; FLATSCR-NEXT: v_writelane_b32 v40, s30, 0 ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 -; FLATSCR-NEXT: v_writelane_b32 v40, s34, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s35, 3 -; FLATSCR-NEXT: v_writelane_b32 v40, s36, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s37, 5 -; FLATSCR-NEXT: v_writelane_b32 v40, s38, 6 -; FLATSCR-NEXT: v_writelane_b32 v40, s39, 7 -; FLATSCR-NEXT: v_writelane_b32 v40, s40, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s41, 9 -; FLATSCR-NEXT: v_writelane_b32 v40, s42, 10 -; FLATSCR-NEXT: v_writelane_b32 v40, s43, 11 -; FLATSCR-NEXT: v_writelane_b32 v40, s44, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s45, 13 -; FLATSCR-NEXT: v_writelane_b32 v40, s46, 14 -; FLATSCR-NEXT: v_writelane_b32 v40, s47, 15 -; FLATSCR-NEXT: v_writelane_b32 v40, s48, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s49, 17 -; FLATSCR-NEXT: v_writelane_b32 v40, s50, 18 -; FLATSCR-NEXT: v_writelane_b32 v40, s51, 19 -; FLATSCR-NEXT: v_writelane_b32 v40, s52, 20 -; FLATSCR-NEXT: v_writelane_b32 v40, s53, 21 -; FLATSCR-NEXT: v_writelane_b32 v40, s54, 22 -; FLATSCR-NEXT: v_writelane_b32 v40, s55, 23 -; FLATSCR-NEXT: v_writelane_b32 v40, s56, 24 -; FLATSCR-NEXT: v_writelane_b32 v40, s57, 25 -; FLATSCR-NEXT: v_writelane_b32 v40, s58, 26 -; FLATSCR-NEXT: v_writelane_b32 v40, s59, 27 -; FLATSCR-NEXT: v_writelane_b32 v40, s60, 28 -; FLATSCR-NEXT: v_writelane_b32 v40, s61, 29 -; FLATSCR-NEXT: v_writelane_b32 v40, s62, 30 -; FLATSCR-NEXT: v_writelane_b32 v40, s63, 31 -; FLATSCR-NEXT: v_writelane_b32 v40, s64, 32 -; FLATSCR-NEXT: v_writelane_b32 v40, s65, 33 -; FLATSCR-NEXT: v_writelane_b32 v40, s66, 34 -; FLATSCR-NEXT: v_writelane_b32 v40, s67, 35 -; FLATSCR-NEXT: v_writelane_b32 v40, s68, 36 -; FLATSCR-NEXT: v_writelane_b32 v40, s69, 37 -; FLATSCR-NEXT: v_writelane_b32 v40, s70, 38 -; FLATSCR-NEXT: v_writelane_b32 v40, s71, 39 -; FLATSCR-NEXT: v_writelane_b32 v40, s72, 40 -; FLATSCR-NEXT: v_writelane_b32 v40, s73, 41 -; FLATSCR-NEXT: v_writelane_b32 v40, s74, 42 -; FLATSCR-NEXT: v_writelane_b32 v40, s75, 43 +; FLATSCR-NEXT: v_writelane_b32 v40, s36, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s37, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s46, 4 +; FLATSCR-NEXT: v_writelane_b32 v40, s47, 5 +; FLATSCR-NEXT: v_writelane_b32 v40, s48, 6 +; FLATSCR-NEXT: v_writelane_b32 v40, s49, 7 +; FLATSCR-NEXT: v_writelane_b32 v40, s50, 8 +; FLATSCR-NEXT: v_writelane_b32 v40, s51, 9 +; FLATSCR-NEXT: v_writelane_b32 v40, s52, 10 +; FLATSCR-NEXT: v_writelane_b32 v40, s53, 11 +; FLATSCR-NEXT: v_writelane_b32 v40, s62, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s63, 13 +; FLATSCR-NEXT: v_writelane_b32 v40, s64, 14 +; FLATSCR-NEXT: v_writelane_b32 v40, s65, 15 +; FLATSCR-NEXT: v_writelane_b32 v40, s66, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s67, 17 +; FLATSCR-NEXT: v_writelane_b32 v40, s68, 18 +; FLATSCR-NEXT: v_writelane_b32 v40, s69, 19 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART @@ -477,7 +405,7 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ; def s[68:75] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; def s[34:35] +; FLATSCR-NEXT: ; def s[76:77] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[52:67] @@ -492,53 +420,29 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ; use s[68:75] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s[34:35] +; FLATSCR-NEXT: ; use s[76:77] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[0:15] ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s75, v40, 43 -; FLATSCR-NEXT: v_readlane_b32 s74, v40, 42 -; FLATSCR-NEXT: v_readlane_b32 s73, v40, 41 -; FLATSCR-NEXT: v_readlane_b32 s72, v40, 40 -; FLATSCR-NEXT: v_readlane_b32 s71, v40, 39 -; FLATSCR-NEXT: v_readlane_b32 s70, v40, 38 -; FLATSCR-NEXT: v_readlane_b32 s69, v40, 37 -; FLATSCR-NEXT: v_readlane_b32 s68, v40, 36 -; FLATSCR-NEXT: v_readlane_b32 s67, v40, 35 -; FLATSCR-NEXT: v_readlane_b32 s66, v40, 34 -; FLATSCR-NEXT: v_readlane_b32 s65, v40, 33 -; FLATSCR-NEXT: v_readlane_b32 s64, v40, 32 -; FLATSCR-NEXT: v_readlane_b32 s63, v40, 31 -; FLATSCR-NEXT: v_readlane_b32 s62, v40, 30 -; FLATSCR-NEXT: v_readlane_b32 s61, v40, 29 -; FLATSCR-NEXT: v_readlane_b32 s60, v40, 28 -; FLATSCR-NEXT: v_readlane_b32 s59, v40, 27 -; FLATSCR-NEXT: v_readlane_b32 s58, v40, 26 -; FLATSCR-NEXT: v_readlane_b32 s57, v40, 25 -; FLATSCR-NEXT: v_readlane_b32 s56, v40, 24 -; FLATSCR-NEXT: v_readlane_b32 s55, v40, 23 -; FLATSCR-NEXT: v_readlane_b32 s54, v40, 22 -; FLATSCR-NEXT: v_readlane_b32 s53, v40, 21 -; FLATSCR-NEXT: v_readlane_b32 s52, v40, 20 -; FLATSCR-NEXT: v_readlane_b32 s51, v40, 19 -; FLATSCR-NEXT: v_readlane_b32 s50, v40, 18 -; FLATSCR-NEXT: v_readlane_b32 s49, v40, 17 -; FLATSCR-NEXT: v_readlane_b32 s48, v40, 16 -; FLATSCR-NEXT: v_readlane_b32 s47, v40, 15 -; FLATSCR-NEXT: v_readlane_b32 s46, v40, 14 -; FLATSCR-NEXT: v_readlane_b32 s45, v40, 13 -; FLATSCR-NEXT: v_readlane_b32 s44, v40, 12 -; FLATSCR-NEXT: v_readlane_b32 s43, v40, 11 -; FLATSCR-NEXT: v_readlane_b32 s42, v40, 10 -; FLATSCR-NEXT: v_readlane_b32 s41, v40, 9 -; FLATSCR-NEXT: v_readlane_b32 s40, v40, 8 -; FLATSCR-NEXT: v_readlane_b32 s39, v40, 7 -; FLATSCR-NEXT: v_readlane_b32 s38, v40, 6 -; FLATSCR-NEXT: v_readlane_b32 s37, v40, 5 -; FLATSCR-NEXT: v_readlane_b32 s36, v40, 4 -; FLATSCR-NEXT: v_readlane_b32 s35, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s34, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s69, v40, 19 +; FLATSCR-NEXT: v_readlane_b32 s68, v40, 18 +; FLATSCR-NEXT: v_readlane_b32 s67, v40, 17 +; FLATSCR-NEXT: v_readlane_b32 s66, v40, 16 +; FLATSCR-NEXT: v_readlane_b32 s65, v40, 15 +; FLATSCR-NEXT: v_readlane_b32 s64, v40, 14 +; FLATSCR-NEXT: v_readlane_b32 s63, v40, 13 +; FLATSCR-NEXT: v_readlane_b32 s62, v40, 12 +; FLATSCR-NEXT: v_readlane_b32 s53, v40, 11 +; FLATSCR-NEXT: v_readlane_b32 s52, v40, 10 +; FLATSCR-NEXT: v_readlane_b32 s51, v40, 9 +; FLATSCR-NEXT: v_readlane_b32 s50, v40, 8 +; FLATSCR-NEXT: v_readlane_b32 s49, v40, 7 +; FLATSCR-NEXT: v_readlane_b32 s48, v40, 6 +; FLATSCR-NEXT: v_readlane_b32 s47, v40, 5 +; FLATSCR-NEXT: v_readlane_b32 s46, v40, 4 +; FLATSCR-NEXT: v_readlane_b32 s37, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s36, v40, 2 ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 ; FLATSCR-NEXT: v_readlane_b32 s30, v40, 0 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -571,39 +475,13 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; Has no spilled CSR VGPRs used for SGPR spilling, so no need to ; enable all lanes and restore. define void @spill_only_csr_sgpr() { -; MUBUF-LABEL: spill_only_csr_sgpr: -; MUBUF: ; %bb.0: -; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: v_writelane_b32 v0, s42, 0 -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; clobber s42 -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s42, v0, 0 -; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_setpc_b64 s[30:31] -; -; FLATSCR-LABEL: spill_only_csr_sgpr: -; FLATSCR: ; %bb.0: -; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill -; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: v_writelane_b32 v0, s42, 0 -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; clobber s42 -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s42, v0, 0 -; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: spill_only_csr_sgpr: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; clobber s42 +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "; clobber s42", "~{s42}"() ret void } @@ -663,143 +541,83 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v1, s40, 0 -; MUBUF-NEXT: v_writelane_b32 v1, s41, 1 -; MUBUF-NEXT: v_writelane_b32 v1, s42, 2 -; MUBUF-NEXT: v_writelane_b32 v1, s43, 3 -; MUBUF-NEXT: v_writelane_b32 v1, s44, 4 -; MUBUF-NEXT: v_writelane_b32 v1, s45, 5 -; MUBUF-NEXT: v_writelane_b32 v1, s46, 6 -; MUBUF-NEXT: v_writelane_b32 v1, s47, 7 -; MUBUF-NEXT: v_writelane_b32 v1, s48, 8 -; MUBUF-NEXT: v_writelane_b32 v1, s49, 9 -; MUBUF-NEXT: v_writelane_b32 v1, s50, 10 -; MUBUF-NEXT: v_writelane_b32 v1, s51, 11 -; MUBUF-NEXT: v_writelane_b32 v1, s52, 12 -; MUBUF-NEXT: v_writelane_b32 v1, s53, 13 -; MUBUF-NEXT: v_writelane_b32 v1, s54, 14 -; MUBUF-NEXT: v_writelane_b32 v1, s55, 15 -; MUBUF-NEXT: v_writelane_b32 v1, s56, 16 -; MUBUF-NEXT: v_writelane_b32 v1, s57, 17 -; MUBUF-NEXT: v_writelane_b32 v1, s58, 18 -; MUBUF-NEXT: v_writelane_b32 v1, s59, 19 -; MUBUF-NEXT: v_writelane_b32 v1, s60, 20 -; MUBUF-NEXT: v_writelane_b32 v1, s61, 21 -; MUBUF-NEXT: v_writelane_b32 v1, s62, 22 -; MUBUF-NEXT: v_writelane_b32 v1, s63, 23 -; MUBUF-NEXT: v_writelane_b32 v1, s64, 24 -; MUBUF-NEXT: v_writelane_b32 v1, s65, 25 -; MUBUF-NEXT: v_writelane_b32 v1, s66, 26 -; MUBUF-NEXT: v_writelane_b32 v1, s67, 27 -; MUBUF-NEXT: v_writelane_b32 v1, s68, 28 -; MUBUF-NEXT: v_writelane_b32 v1, s69, 29 -; MUBUF-NEXT: v_writelane_b32 v1, s70, 30 -; MUBUF-NEXT: v_writelane_b32 v1, s71, 31 -; MUBUF-NEXT: v_writelane_b32 v1, s72, 32 -; MUBUF-NEXT: v_writelane_b32 v1, s73, 33 -; MUBUF-NEXT: v_writelane_b32 v1, s74, 34 -; MUBUF-NEXT: v_writelane_b32 v1, s75, 35 -; MUBUF-NEXT: v_writelane_b32 v1, s76, 36 -; MUBUF-NEXT: v_writelane_b32 v1, s77, 37 -; MUBUF-NEXT: v_writelane_b32 v1, s78, 38 -; MUBUF-NEXT: v_writelane_b32 v1, s79, 39 -; MUBUF-NEXT: v_writelane_b32 v1, s80, 40 -; MUBUF-NEXT: v_writelane_b32 v1, s81, 41 -; MUBUF-NEXT: v_writelane_b32 v1, s82, 42 -; MUBUF-NEXT: v_writelane_b32 v1, s83, 43 -; MUBUF-NEXT: v_writelane_b32 v1, s84, 44 -; MUBUF-NEXT: v_writelane_b32 v1, s85, 45 -; MUBUF-NEXT: v_writelane_b32 v1, s86, 46 -; MUBUF-NEXT: v_writelane_b32 v1, s87, 47 -; MUBUF-NEXT: v_writelane_b32 v1, s88, 48 -; MUBUF-NEXT: v_writelane_b32 v1, s89, 49 -; MUBUF-NEXT: v_writelane_b32 v1, s90, 50 -; MUBUF-NEXT: v_writelane_b32 v1, s91, 51 -; MUBUF-NEXT: v_writelane_b32 v1, s92, 52 -; MUBUF-NEXT: v_writelane_b32 v1, s93, 53 -; MUBUF-NEXT: v_writelane_b32 v1, s94, 54 -; MUBUF-NEXT: v_writelane_b32 v1, s95, 55 -; MUBUF-NEXT: v_writelane_b32 v1, s96, 56 -; MUBUF-NEXT: v_writelane_b32 v1, s97, 57 -; MUBUF-NEXT: v_writelane_b32 v1, s98, 58 -; MUBUF-NEXT: v_writelane_b32 v1, s99, 59 -; MUBUF-NEXT: v_writelane_b32 v1, s100, 60 +; MUBUF-NEXT: v_writelane_b32 v1, s46, 0 +; MUBUF-NEXT: v_writelane_b32 v1, s47, 1 +; MUBUF-NEXT: v_writelane_b32 v1, s48, 2 +; MUBUF-NEXT: v_writelane_b32 v1, s49, 3 +; MUBUF-NEXT: v_writelane_b32 v1, s50, 4 +; MUBUF-NEXT: v_writelane_b32 v1, s51, 5 +; MUBUF-NEXT: v_writelane_b32 v1, s52, 6 +; MUBUF-NEXT: v_writelane_b32 v1, s53, 7 +; MUBUF-NEXT: v_writelane_b32 v1, s62, 8 +; MUBUF-NEXT: v_writelane_b32 v1, s63, 9 +; MUBUF-NEXT: v_writelane_b32 v1, s64, 10 +; MUBUF-NEXT: v_writelane_b32 v1, s65, 11 +; MUBUF-NEXT: v_writelane_b32 v1, s66, 12 +; MUBUF-NEXT: v_writelane_b32 v1, s67, 13 +; MUBUF-NEXT: v_writelane_b32 v1, s68, 14 +; MUBUF-NEXT: v_writelane_b32 v1, s69, 15 +; MUBUF-NEXT: v_writelane_b32 v1, s78, 16 +; MUBUF-NEXT: v_writelane_b32 v1, s79, 17 +; MUBUF-NEXT: v_writelane_b32 v1, s80, 18 +; MUBUF-NEXT: v_writelane_b32 v1, s81, 19 +; MUBUF-NEXT: v_writelane_b32 v1, s82, 20 +; MUBUF-NEXT: v_writelane_b32 v1, s83, 21 +; MUBUF-NEXT: v_writelane_b32 v1, s84, 22 +; MUBUF-NEXT: v_writelane_b32 v1, s85, 23 +; MUBUF-NEXT: v_writelane_b32 v1, s94, 24 +; MUBUF-NEXT: v_writelane_b32 v1, s95, 25 +; MUBUF-NEXT: v_writelane_b32 v1, s96, 26 +; MUBUF-NEXT: v_writelane_b32 v1, s97, 27 +; MUBUF-NEXT: v_writelane_b32 v1, s98, 28 +; MUBUF-NEXT: v_writelane_b32 v1, s99, 29 +; MUBUF-NEXT: v_writelane_b32 v1, s100, 30 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v1, s101, 61 +; MUBUF-NEXT: v_writelane_b32 v1, s101, 31 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 62 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 32 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_readlane_b32 s102, v1, 62 -; MUBUF-NEXT: v_readlane_b32 s101, v1, 61 -; MUBUF-NEXT: v_readlane_b32 s100, v1, 60 -; MUBUF-NEXT: v_readlane_b32 s99, v1, 59 -; MUBUF-NEXT: v_readlane_b32 s98, v1, 58 -; MUBUF-NEXT: v_readlane_b32 s97, v1, 57 -; MUBUF-NEXT: v_readlane_b32 s96, v1, 56 -; MUBUF-NEXT: v_readlane_b32 s95, v1, 55 -; MUBUF-NEXT: v_readlane_b32 s94, v1, 54 -; MUBUF-NEXT: v_readlane_b32 s93, v1, 53 -; MUBUF-NEXT: v_readlane_b32 s92, v1, 52 -; MUBUF-NEXT: v_readlane_b32 s91, v1, 51 -; MUBUF-NEXT: v_readlane_b32 s90, v1, 50 -; MUBUF-NEXT: v_readlane_b32 s89, v1, 49 -; MUBUF-NEXT: v_readlane_b32 s88, v1, 48 -; MUBUF-NEXT: v_readlane_b32 s87, v1, 47 -; MUBUF-NEXT: v_readlane_b32 s86, v1, 46 -; MUBUF-NEXT: v_readlane_b32 s85, v1, 45 -; MUBUF-NEXT: v_readlane_b32 s84, v1, 44 -; MUBUF-NEXT: v_readlane_b32 s83, v1, 43 -; MUBUF-NEXT: v_readlane_b32 s82, v1, 42 -; MUBUF-NEXT: v_readlane_b32 s81, v1, 41 -; MUBUF-NEXT: v_readlane_b32 s80, v1, 40 -; MUBUF-NEXT: v_readlane_b32 s79, v1, 39 -; MUBUF-NEXT: v_readlane_b32 s78, v1, 38 -; MUBUF-NEXT: v_readlane_b32 s77, v1, 37 -; MUBUF-NEXT: v_readlane_b32 s76, v1, 36 -; MUBUF-NEXT: v_readlane_b32 s75, v1, 35 -; MUBUF-NEXT: v_readlane_b32 s74, v1, 34 -; MUBUF-NEXT: v_readlane_b32 s73, v1, 33 -; MUBUF-NEXT: v_readlane_b32 s72, v1, 32 -; MUBUF-NEXT: v_readlane_b32 s71, v1, 31 -; MUBUF-NEXT: v_readlane_b32 s70, v1, 30 -; MUBUF-NEXT: v_readlane_b32 s69, v1, 29 -; MUBUF-NEXT: v_readlane_b32 s68, v1, 28 -; MUBUF-NEXT: v_readlane_b32 s67, v1, 27 -; MUBUF-NEXT: v_readlane_b32 s66, v1, 26 -; MUBUF-NEXT: v_readlane_b32 s65, v1, 25 -; MUBUF-NEXT: v_readlane_b32 s64, v1, 24 -; MUBUF-NEXT: v_readlane_b32 s63, v1, 23 -; MUBUF-NEXT: v_readlane_b32 s62, v1, 22 -; MUBUF-NEXT: v_readlane_b32 s61, v1, 21 -; MUBUF-NEXT: v_readlane_b32 s60, v1, 20 -; MUBUF-NEXT: v_readlane_b32 s59, v1, 19 -; MUBUF-NEXT: v_readlane_b32 s58, v1, 18 -; MUBUF-NEXT: v_readlane_b32 s57, v1, 17 -; MUBUF-NEXT: v_readlane_b32 s56, v1, 16 -; MUBUF-NEXT: v_readlane_b32 s55, v1, 15 -; MUBUF-NEXT: v_readlane_b32 s54, v1, 14 -; MUBUF-NEXT: v_readlane_b32 s53, v1, 13 -; MUBUF-NEXT: v_readlane_b32 s52, v1, 12 -; MUBUF-NEXT: v_readlane_b32 s51, v1, 11 -; MUBUF-NEXT: v_readlane_b32 s50, v1, 10 -; MUBUF-NEXT: v_readlane_b32 s49, v1, 9 -; MUBUF-NEXT: v_readlane_b32 s48, v1, 8 -; MUBUF-NEXT: v_readlane_b32 s47, v1, 7 -; MUBUF-NEXT: v_readlane_b32 s46, v1, 6 -; MUBUF-NEXT: v_readlane_b32 s45, v1, 5 -; MUBUF-NEXT: v_readlane_b32 s44, v1, 4 -; MUBUF-NEXT: v_readlane_b32 s43, v1, 3 -; MUBUF-NEXT: v_readlane_b32 s42, v1, 2 -; MUBUF-NEXT: v_readlane_b32 s41, v1, 1 -; MUBUF-NEXT: v_readlane_b32 s40, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v1, 32 +; MUBUF-NEXT: v_readlane_b32 s101, v1, 31 +; MUBUF-NEXT: v_readlane_b32 s100, v1, 30 +; MUBUF-NEXT: v_readlane_b32 s99, v1, 29 +; MUBUF-NEXT: v_readlane_b32 s98, v1, 28 +; MUBUF-NEXT: v_readlane_b32 s97, v1, 27 +; MUBUF-NEXT: v_readlane_b32 s96, v1, 26 +; MUBUF-NEXT: v_readlane_b32 s95, v1, 25 +; MUBUF-NEXT: v_readlane_b32 s94, v1, 24 +; MUBUF-NEXT: v_readlane_b32 s85, v1, 23 +; MUBUF-NEXT: v_readlane_b32 s84, v1, 22 +; MUBUF-NEXT: v_readlane_b32 s83, v1, 21 +; MUBUF-NEXT: v_readlane_b32 s82, v1, 20 +; MUBUF-NEXT: v_readlane_b32 s81, v1, 19 +; MUBUF-NEXT: v_readlane_b32 s80, v1, 18 +; MUBUF-NEXT: v_readlane_b32 s79, v1, 17 +; MUBUF-NEXT: v_readlane_b32 s78, v1, 16 +; MUBUF-NEXT: v_readlane_b32 s69, v1, 15 +; MUBUF-NEXT: v_readlane_b32 s68, v1, 14 +; MUBUF-NEXT: v_readlane_b32 s67, v1, 13 +; MUBUF-NEXT: v_readlane_b32 s66, v1, 12 +; MUBUF-NEXT: v_readlane_b32 s65, v1, 11 +; MUBUF-NEXT: v_readlane_b32 s64, v1, 10 +; MUBUF-NEXT: v_readlane_b32 s63, v1, 9 +; MUBUF-NEXT: v_readlane_b32 s62, v1, 8 +; MUBUF-NEXT: v_readlane_b32 s53, v1, 7 +; MUBUF-NEXT: v_readlane_b32 s52, v1, 6 +; MUBUF-NEXT: v_readlane_b32 s51, v1, 5 +; MUBUF-NEXT: v_readlane_b32 s50, v1, 4 +; MUBUF-NEXT: v_readlane_b32 s49, v1, 3 +; MUBUF-NEXT: v_readlane_b32 s48, v1, 2 +; MUBUF-NEXT: v_readlane_b32 s47, v1, 1 +; MUBUF-NEXT: v_readlane_b32 s46, v1, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -816,143 +634,83 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v1, s40, 0 -; FLATSCR-NEXT: v_writelane_b32 v1, s41, 1 -; FLATSCR-NEXT: v_writelane_b32 v1, s42, 2 -; FLATSCR-NEXT: v_writelane_b32 v1, s43, 3 -; FLATSCR-NEXT: v_writelane_b32 v1, s44, 4 -; FLATSCR-NEXT: v_writelane_b32 v1, s45, 5 -; FLATSCR-NEXT: v_writelane_b32 v1, s46, 6 -; FLATSCR-NEXT: v_writelane_b32 v1, s47, 7 -; FLATSCR-NEXT: v_writelane_b32 v1, s48, 8 -; FLATSCR-NEXT: v_writelane_b32 v1, s49, 9 -; FLATSCR-NEXT: v_writelane_b32 v1, s50, 10 -; FLATSCR-NEXT: v_writelane_b32 v1, s51, 11 -; FLATSCR-NEXT: v_writelane_b32 v1, s52, 12 -; FLATSCR-NEXT: v_writelane_b32 v1, s53, 13 -; FLATSCR-NEXT: v_writelane_b32 v1, s54, 14 -; FLATSCR-NEXT: v_writelane_b32 v1, s55, 15 -; FLATSCR-NEXT: v_writelane_b32 v1, s56, 16 -; FLATSCR-NEXT: v_writelane_b32 v1, s57, 17 -; FLATSCR-NEXT: v_writelane_b32 v1, s58, 18 -; FLATSCR-NEXT: v_writelane_b32 v1, s59, 19 -; FLATSCR-NEXT: v_writelane_b32 v1, s60, 20 -; FLATSCR-NEXT: v_writelane_b32 v1, s61, 21 -; FLATSCR-NEXT: v_writelane_b32 v1, s62, 22 -; FLATSCR-NEXT: v_writelane_b32 v1, s63, 23 -; FLATSCR-NEXT: v_writelane_b32 v1, s64, 24 -; FLATSCR-NEXT: v_writelane_b32 v1, s65, 25 -; FLATSCR-NEXT: v_writelane_b32 v1, s66, 26 -; FLATSCR-NEXT: v_writelane_b32 v1, s67, 27 -; FLATSCR-NEXT: v_writelane_b32 v1, s68, 28 -; FLATSCR-NEXT: v_writelane_b32 v1, s69, 29 -; FLATSCR-NEXT: v_writelane_b32 v1, s70, 30 -; FLATSCR-NEXT: v_writelane_b32 v1, s71, 31 -; FLATSCR-NEXT: v_writelane_b32 v1, s72, 32 -; FLATSCR-NEXT: v_writelane_b32 v1, s73, 33 -; FLATSCR-NEXT: v_writelane_b32 v1, s74, 34 -; FLATSCR-NEXT: v_writelane_b32 v1, s75, 35 -; FLATSCR-NEXT: v_writelane_b32 v1, s76, 36 -; FLATSCR-NEXT: v_writelane_b32 v1, s77, 37 -; FLATSCR-NEXT: v_writelane_b32 v1, s78, 38 -; FLATSCR-NEXT: v_writelane_b32 v1, s79, 39 -; FLATSCR-NEXT: v_writelane_b32 v1, s80, 40 -; FLATSCR-NEXT: v_writelane_b32 v1, s81, 41 -; FLATSCR-NEXT: v_writelane_b32 v1, s82, 42 -; FLATSCR-NEXT: v_writelane_b32 v1, s83, 43 -; FLATSCR-NEXT: v_writelane_b32 v1, s84, 44 -; FLATSCR-NEXT: v_writelane_b32 v1, s85, 45 -; FLATSCR-NEXT: v_writelane_b32 v1, s86, 46 -; FLATSCR-NEXT: v_writelane_b32 v1, s87, 47 -; FLATSCR-NEXT: v_writelane_b32 v1, s88, 48 -; FLATSCR-NEXT: v_writelane_b32 v1, s89, 49 -; FLATSCR-NEXT: v_writelane_b32 v1, s90, 50 -; FLATSCR-NEXT: v_writelane_b32 v1, s91, 51 -; FLATSCR-NEXT: v_writelane_b32 v1, s92, 52 -; FLATSCR-NEXT: v_writelane_b32 v1, s93, 53 -; FLATSCR-NEXT: v_writelane_b32 v1, s94, 54 -; FLATSCR-NEXT: v_writelane_b32 v1, s95, 55 -; FLATSCR-NEXT: v_writelane_b32 v1, s96, 56 -; FLATSCR-NEXT: v_writelane_b32 v1, s97, 57 -; FLATSCR-NEXT: v_writelane_b32 v1, s98, 58 -; FLATSCR-NEXT: v_writelane_b32 v1, s99, 59 -; FLATSCR-NEXT: v_writelane_b32 v1, s100, 60 +; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0 +; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1 +; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2 +; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3 +; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4 +; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5 +; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6 +; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7 +; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8 +; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9 +; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10 +; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11 +; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12 +; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13 +; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14 +; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15 +; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16 +; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17 +; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18 +; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19 +; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20 +; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21 +; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22 +; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23 +; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24 +; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25 +; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26 +; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27 +; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28 +; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29 +; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v1, s101, 61 +; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 62 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_readlane_b32 s102, v1, 62 -; FLATSCR-NEXT: v_readlane_b32 s101, v1, 61 -; FLATSCR-NEXT: v_readlane_b32 s100, v1, 60 -; FLATSCR-NEXT: v_readlane_b32 s99, v1, 59 -; FLATSCR-NEXT: v_readlane_b32 s98, v1, 58 -; FLATSCR-NEXT: v_readlane_b32 s97, v1, 57 -; FLATSCR-NEXT: v_readlane_b32 s96, v1, 56 -; FLATSCR-NEXT: v_readlane_b32 s95, v1, 55 -; FLATSCR-NEXT: v_readlane_b32 s94, v1, 54 -; FLATSCR-NEXT: v_readlane_b32 s93, v1, 53 -; FLATSCR-NEXT: v_readlane_b32 s92, v1, 52 -; FLATSCR-NEXT: v_readlane_b32 s91, v1, 51 -; FLATSCR-NEXT: v_readlane_b32 s90, v1, 50 -; FLATSCR-NEXT: v_readlane_b32 s89, v1, 49 -; FLATSCR-NEXT: v_readlane_b32 s88, v1, 48 -; FLATSCR-NEXT: v_readlane_b32 s87, v1, 47 -; FLATSCR-NEXT: v_readlane_b32 s86, v1, 46 -; FLATSCR-NEXT: v_readlane_b32 s85, v1, 45 -; FLATSCR-NEXT: v_readlane_b32 s84, v1, 44 -; FLATSCR-NEXT: v_readlane_b32 s83, v1, 43 -; FLATSCR-NEXT: v_readlane_b32 s82, v1, 42 -; FLATSCR-NEXT: v_readlane_b32 s81, v1, 41 -; FLATSCR-NEXT: v_readlane_b32 s80, v1, 40 -; FLATSCR-NEXT: v_readlane_b32 s79, v1, 39 -; FLATSCR-NEXT: v_readlane_b32 s78, v1, 38 -; FLATSCR-NEXT: v_readlane_b32 s77, v1, 37 -; FLATSCR-NEXT: v_readlane_b32 s76, v1, 36 -; FLATSCR-NEXT: v_readlane_b32 s75, v1, 35 -; FLATSCR-NEXT: v_readlane_b32 s74, v1, 34 -; FLATSCR-NEXT: v_readlane_b32 s73, v1, 33 -; FLATSCR-NEXT: v_readlane_b32 s72, v1, 32 -; FLATSCR-NEXT: v_readlane_b32 s71, v1, 31 -; FLATSCR-NEXT: v_readlane_b32 s70, v1, 30 -; FLATSCR-NEXT: v_readlane_b32 s69, v1, 29 -; FLATSCR-NEXT: v_readlane_b32 s68, v1, 28 -; FLATSCR-NEXT: v_readlane_b32 s67, v1, 27 -; FLATSCR-NEXT: v_readlane_b32 s66, v1, 26 -; FLATSCR-NEXT: v_readlane_b32 s65, v1, 25 -; FLATSCR-NEXT: v_readlane_b32 s64, v1, 24 -; FLATSCR-NEXT: v_readlane_b32 s63, v1, 23 -; FLATSCR-NEXT: v_readlane_b32 s62, v1, 22 -; FLATSCR-NEXT: v_readlane_b32 s61, v1, 21 -; FLATSCR-NEXT: v_readlane_b32 s60, v1, 20 -; FLATSCR-NEXT: v_readlane_b32 s59, v1, 19 -; FLATSCR-NEXT: v_readlane_b32 s58, v1, 18 -; FLATSCR-NEXT: v_readlane_b32 s57, v1, 17 -; FLATSCR-NEXT: v_readlane_b32 s56, v1, 16 -; FLATSCR-NEXT: v_readlane_b32 s55, v1, 15 -; FLATSCR-NEXT: v_readlane_b32 s54, v1, 14 -; FLATSCR-NEXT: v_readlane_b32 s53, v1, 13 -; FLATSCR-NEXT: v_readlane_b32 s52, v1, 12 -; FLATSCR-NEXT: v_readlane_b32 s51, v1, 11 -; FLATSCR-NEXT: v_readlane_b32 s50, v1, 10 -; FLATSCR-NEXT: v_readlane_b32 s49, v1, 9 -; FLATSCR-NEXT: v_readlane_b32 s48, v1, 8 -; FLATSCR-NEXT: v_readlane_b32 s47, v1, 7 -; FLATSCR-NEXT: v_readlane_b32 s46, v1, 6 -; FLATSCR-NEXT: v_readlane_b32 s45, v1, 5 -; FLATSCR-NEXT: v_readlane_b32 s44, v1, 4 -; FLATSCR-NEXT: v_readlane_b32 s43, v1, 3 -; FLATSCR-NEXT: v_readlane_b32 s42, v1, 2 -; FLATSCR-NEXT: v_readlane_b32 s41, v1, 1 -; FLATSCR-NEXT: v_readlane_b32 s40, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32 +; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31 +; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30 +; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29 +; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28 +; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27 +; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26 +; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25 +; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24 +; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23 +; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22 +; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21 +; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20 +; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19 +; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18 +; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17 +; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16 +; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15 +; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14 +; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13 +; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12 +; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11 +; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10 +; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9 +; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8 +; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7 +; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6 +; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5 +; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4 +; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3 +; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2 +; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1 +; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload @@ -985,145 +743,83 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v1, s39, 0 -; MUBUF-NEXT: v_writelane_b32 v1, s40, 1 -; MUBUF-NEXT: v_writelane_b32 v1, s41, 2 -; MUBUF-NEXT: v_writelane_b32 v1, s42, 3 -; MUBUF-NEXT: v_writelane_b32 v1, s43, 4 -; MUBUF-NEXT: v_writelane_b32 v1, s44, 5 -; MUBUF-NEXT: v_writelane_b32 v1, s45, 6 -; MUBUF-NEXT: v_writelane_b32 v1, s46, 7 -; MUBUF-NEXT: v_writelane_b32 v1, s47, 8 -; MUBUF-NEXT: v_writelane_b32 v1, s48, 9 -; MUBUF-NEXT: v_writelane_b32 v1, s49, 10 -; MUBUF-NEXT: v_writelane_b32 v1, s50, 11 -; MUBUF-NEXT: v_writelane_b32 v1, s51, 12 -; MUBUF-NEXT: v_writelane_b32 v1, s52, 13 -; MUBUF-NEXT: v_writelane_b32 v1, s53, 14 -; MUBUF-NEXT: v_writelane_b32 v1, s54, 15 -; MUBUF-NEXT: v_writelane_b32 v1, s55, 16 -; MUBUF-NEXT: v_writelane_b32 v1, s56, 17 -; MUBUF-NEXT: v_writelane_b32 v1, s57, 18 -; MUBUF-NEXT: v_writelane_b32 v1, s58, 19 -; MUBUF-NEXT: v_writelane_b32 v1, s59, 20 -; MUBUF-NEXT: v_writelane_b32 v1, s60, 21 -; MUBUF-NEXT: v_writelane_b32 v1, s61, 22 -; MUBUF-NEXT: v_writelane_b32 v1, s62, 23 -; MUBUF-NEXT: v_writelane_b32 v1, s63, 24 -; MUBUF-NEXT: v_writelane_b32 v1, s64, 25 -; MUBUF-NEXT: v_writelane_b32 v1, s65, 26 -; MUBUF-NEXT: v_writelane_b32 v1, s66, 27 -; MUBUF-NEXT: v_writelane_b32 v1, s67, 28 -; MUBUF-NEXT: v_writelane_b32 v1, s68, 29 -; MUBUF-NEXT: v_writelane_b32 v1, s69, 30 -; MUBUF-NEXT: v_writelane_b32 v1, s70, 31 -; MUBUF-NEXT: v_writelane_b32 v1, s71, 32 -; MUBUF-NEXT: v_writelane_b32 v1, s72, 33 -; MUBUF-NEXT: v_writelane_b32 v1, s73, 34 -; MUBUF-NEXT: v_writelane_b32 v1, s74, 35 -; MUBUF-NEXT: v_writelane_b32 v1, s75, 36 -; MUBUF-NEXT: v_writelane_b32 v1, s76, 37 -; MUBUF-NEXT: v_writelane_b32 v1, s77, 38 -; MUBUF-NEXT: v_writelane_b32 v1, s78, 39 -; MUBUF-NEXT: v_writelane_b32 v1, s79, 40 -; MUBUF-NEXT: v_writelane_b32 v1, s80, 41 -; MUBUF-NEXT: v_writelane_b32 v1, s81, 42 -; MUBUF-NEXT: v_writelane_b32 v1, s82, 43 -; MUBUF-NEXT: v_writelane_b32 v1, s83, 44 -; MUBUF-NEXT: v_writelane_b32 v1, s84, 45 -; MUBUF-NEXT: v_writelane_b32 v1, s85, 46 -; MUBUF-NEXT: v_writelane_b32 v1, s86, 47 -; MUBUF-NEXT: v_writelane_b32 v1, s87, 48 -; MUBUF-NEXT: v_writelane_b32 v1, s88, 49 -; MUBUF-NEXT: v_writelane_b32 v1, s89, 50 -; MUBUF-NEXT: v_writelane_b32 v1, s90, 51 -; MUBUF-NEXT: v_writelane_b32 v1, s91, 52 -; MUBUF-NEXT: v_writelane_b32 v1, s92, 53 -; MUBUF-NEXT: v_writelane_b32 v1, s93, 54 -; MUBUF-NEXT: v_writelane_b32 v1, s94, 55 -; MUBUF-NEXT: v_writelane_b32 v1, s95, 56 -; MUBUF-NEXT: v_writelane_b32 v1, s96, 57 -; MUBUF-NEXT: v_writelane_b32 v1, s97, 58 -; MUBUF-NEXT: v_writelane_b32 v1, s98, 59 -; MUBUF-NEXT: v_writelane_b32 v1, s99, 60 -; MUBUF-NEXT: v_writelane_b32 v1, s100, 61 +; MUBUF-NEXT: v_writelane_b32 v1, s46, 0 +; MUBUF-NEXT: v_writelane_b32 v1, s47, 1 +; MUBUF-NEXT: v_writelane_b32 v1, s48, 2 +; MUBUF-NEXT: v_writelane_b32 v1, s49, 3 +; MUBUF-NEXT: v_writelane_b32 v1, s50, 4 +; MUBUF-NEXT: v_writelane_b32 v1, s51, 5 +; MUBUF-NEXT: v_writelane_b32 v1, s52, 6 +; MUBUF-NEXT: v_writelane_b32 v1, s53, 7 +; MUBUF-NEXT: v_writelane_b32 v1, s62, 8 +; MUBUF-NEXT: v_writelane_b32 v1, s63, 9 +; MUBUF-NEXT: v_writelane_b32 v1, s64, 10 +; MUBUF-NEXT: v_writelane_b32 v1, s65, 11 +; MUBUF-NEXT: v_writelane_b32 v1, s66, 12 +; MUBUF-NEXT: v_writelane_b32 v1, s67, 13 +; MUBUF-NEXT: v_writelane_b32 v1, s68, 14 +; MUBUF-NEXT: v_writelane_b32 v1, s69, 15 +; MUBUF-NEXT: v_writelane_b32 v1, s78, 16 +; MUBUF-NEXT: v_writelane_b32 v1, s79, 17 +; MUBUF-NEXT: v_writelane_b32 v1, s80, 18 +; MUBUF-NEXT: v_writelane_b32 v1, s81, 19 +; MUBUF-NEXT: v_writelane_b32 v1, s82, 20 +; MUBUF-NEXT: v_writelane_b32 v1, s83, 21 +; MUBUF-NEXT: v_writelane_b32 v1, s84, 22 +; MUBUF-NEXT: v_writelane_b32 v1, s85, 23 +; MUBUF-NEXT: v_writelane_b32 v1, s94, 24 +; MUBUF-NEXT: v_writelane_b32 v1, s95, 25 +; MUBUF-NEXT: v_writelane_b32 v1, s96, 26 +; MUBUF-NEXT: v_writelane_b32 v1, s97, 27 +; MUBUF-NEXT: v_writelane_b32 v1, s98, 28 +; MUBUF-NEXT: v_writelane_b32 v1, s99, 29 +; MUBUF-NEXT: v_writelane_b32 v1, s100, 30 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v1, s101, 62 +; MUBUF-NEXT: v_writelane_b32 v1, s101, 31 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 63 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 32 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_readlane_b32 s102, v1, 63 -; MUBUF-NEXT: v_readlane_b32 s101, v1, 62 -; MUBUF-NEXT: v_readlane_b32 s100, v1, 61 -; MUBUF-NEXT: v_readlane_b32 s99, v1, 60 -; MUBUF-NEXT: v_readlane_b32 s98, v1, 59 -; MUBUF-NEXT: v_readlane_b32 s97, v1, 58 -; MUBUF-NEXT: v_readlane_b32 s96, v1, 57 -; MUBUF-NEXT: v_readlane_b32 s95, v1, 56 -; MUBUF-NEXT: v_readlane_b32 s94, v1, 55 -; MUBUF-NEXT: v_readlane_b32 s93, v1, 54 -; MUBUF-NEXT: v_readlane_b32 s92, v1, 53 -; MUBUF-NEXT: v_readlane_b32 s91, v1, 52 -; MUBUF-NEXT: v_readlane_b32 s90, v1, 51 -; MUBUF-NEXT: v_readlane_b32 s89, v1, 50 -; MUBUF-NEXT: v_readlane_b32 s88, v1, 49 -; MUBUF-NEXT: v_readlane_b32 s87, v1, 48 -; MUBUF-NEXT: v_readlane_b32 s86, v1, 47 -; MUBUF-NEXT: v_readlane_b32 s85, v1, 46 -; MUBUF-NEXT: v_readlane_b32 s84, v1, 45 -; MUBUF-NEXT: v_readlane_b32 s83, v1, 44 -; MUBUF-NEXT: v_readlane_b32 s82, v1, 43 -; MUBUF-NEXT: v_readlane_b32 s81, v1, 42 -; MUBUF-NEXT: v_readlane_b32 s80, v1, 41 -; MUBUF-NEXT: v_readlane_b32 s79, v1, 40 -; MUBUF-NEXT: v_readlane_b32 s78, v1, 39 -; MUBUF-NEXT: v_readlane_b32 s77, v1, 38 -; MUBUF-NEXT: v_readlane_b32 s76, v1, 37 -; MUBUF-NEXT: v_readlane_b32 s75, v1, 36 -; MUBUF-NEXT: v_readlane_b32 s74, v1, 35 -; MUBUF-NEXT: v_readlane_b32 s73, v1, 34 -; MUBUF-NEXT: v_readlane_b32 s72, v1, 33 -; MUBUF-NEXT: v_readlane_b32 s71, v1, 32 -; MUBUF-NEXT: v_readlane_b32 s70, v1, 31 -; MUBUF-NEXT: v_readlane_b32 s69, v1, 30 -; MUBUF-NEXT: v_readlane_b32 s68, v1, 29 -; MUBUF-NEXT: v_readlane_b32 s67, v1, 28 -; MUBUF-NEXT: v_readlane_b32 s66, v1, 27 -; MUBUF-NEXT: v_readlane_b32 s65, v1, 26 -; MUBUF-NEXT: v_readlane_b32 s64, v1, 25 -; MUBUF-NEXT: v_readlane_b32 s63, v1, 24 -; MUBUF-NEXT: v_readlane_b32 s62, v1, 23 -; MUBUF-NEXT: v_readlane_b32 s61, v1, 22 -; MUBUF-NEXT: v_readlane_b32 s60, v1, 21 -; MUBUF-NEXT: v_readlane_b32 s59, v1, 20 -; MUBUF-NEXT: v_readlane_b32 s58, v1, 19 -; MUBUF-NEXT: v_readlane_b32 s57, v1, 18 -; MUBUF-NEXT: v_readlane_b32 s56, v1, 17 -; MUBUF-NEXT: v_readlane_b32 s55, v1, 16 -; MUBUF-NEXT: v_readlane_b32 s54, v1, 15 -; MUBUF-NEXT: v_readlane_b32 s53, v1, 14 -; MUBUF-NEXT: v_readlane_b32 s52, v1, 13 -; MUBUF-NEXT: v_readlane_b32 s51, v1, 12 -; MUBUF-NEXT: v_readlane_b32 s50, v1, 11 -; MUBUF-NEXT: v_readlane_b32 s49, v1, 10 -; MUBUF-NEXT: v_readlane_b32 s48, v1, 9 -; MUBUF-NEXT: v_readlane_b32 s47, v1, 8 -; MUBUF-NEXT: v_readlane_b32 s46, v1, 7 -; MUBUF-NEXT: v_readlane_b32 s45, v1, 6 -; MUBUF-NEXT: v_readlane_b32 s44, v1, 5 -; MUBUF-NEXT: v_readlane_b32 s43, v1, 4 -; MUBUF-NEXT: v_readlane_b32 s42, v1, 3 -; MUBUF-NEXT: v_readlane_b32 s41, v1, 2 -; MUBUF-NEXT: v_readlane_b32 s40, v1, 1 -; MUBUF-NEXT: v_readlane_b32 s39, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v1, 32 +; MUBUF-NEXT: v_readlane_b32 s101, v1, 31 +; MUBUF-NEXT: v_readlane_b32 s100, v1, 30 +; MUBUF-NEXT: v_readlane_b32 s99, v1, 29 +; MUBUF-NEXT: v_readlane_b32 s98, v1, 28 +; MUBUF-NEXT: v_readlane_b32 s97, v1, 27 +; MUBUF-NEXT: v_readlane_b32 s96, v1, 26 +; MUBUF-NEXT: v_readlane_b32 s95, v1, 25 +; MUBUF-NEXT: v_readlane_b32 s94, v1, 24 +; MUBUF-NEXT: v_readlane_b32 s85, v1, 23 +; MUBUF-NEXT: v_readlane_b32 s84, v1, 22 +; MUBUF-NEXT: v_readlane_b32 s83, v1, 21 +; MUBUF-NEXT: v_readlane_b32 s82, v1, 20 +; MUBUF-NEXT: v_readlane_b32 s81, v1, 19 +; MUBUF-NEXT: v_readlane_b32 s80, v1, 18 +; MUBUF-NEXT: v_readlane_b32 s79, v1, 17 +; MUBUF-NEXT: v_readlane_b32 s78, v1, 16 +; MUBUF-NEXT: v_readlane_b32 s69, v1, 15 +; MUBUF-NEXT: v_readlane_b32 s68, v1, 14 +; MUBUF-NEXT: v_readlane_b32 s67, v1, 13 +; MUBUF-NEXT: v_readlane_b32 s66, v1, 12 +; MUBUF-NEXT: v_readlane_b32 s65, v1, 11 +; MUBUF-NEXT: v_readlane_b32 s64, v1, 10 +; MUBUF-NEXT: v_readlane_b32 s63, v1, 9 +; MUBUF-NEXT: v_readlane_b32 s62, v1, 8 +; MUBUF-NEXT: v_readlane_b32 s53, v1, 7 +; MUBUF-NEXT: v_readlane_b32 s52, v1, 6 +; MUBUF-NEXT: v_readlane_b32 s51, v1, 5 +; MUBUF-NEXT: v_readlane_b32 s50, v1, 4 +; MUBUF-NEXT: v_readlane_b32 s49, v1, 3 +; MUBUF-NEXT: v_readlane_b32 s48, v1, 2 +; MUBUF-NEXT: v_readlane_b32 s47, v1, 1 +; MUBUF-NEXT: v_readlane_b32 s46, v1, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -1140,145 +836,83 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v1, s39, 0 -; FLATSCR-NEXT: v_writelane_b32 v1, s40, 1 -; FLATSCR-NEXT: v_writelane_b32 v1, s41, 2 -; FLATSCR-NEXT: v_writelane_b32 v1, s42, 3 -; FLATSCR-NEXT: v_writelane_b32 v1, s43, 4 -; FLATSCR-NEXT: v_writelane_b32 v1, s44, 5 -; FLATSCR-NEXT: v_writelane_b32 v1, s45, 6 -; FLATSCR-NEXT: v_writelane_b32 v1, s46, 7 -; FLATSCR-NEXT: v_writelane_b32 v1, s47, 8 -; FLATSCR-NEXT: v_writelane_b32 v1, s48, 9 -; FLATSCR-NEXT: v_writelane_b32 v1, s49, 10 -; FLATSCR-NEXT: v_writelane_b32 v1, s50, 11 -; FLATSCR-NEXT: v_writelane_b32 v1, s51, 12 -; FLATSCR-NEXT: v_writelane_b32 v1, s52, 13 -; FLATSCR-NEXT: v_writelane_b32 v1, s53, 14 -; FLATSCR-NEXT: v_writelane_b32 v1, s54, 15 -; FLATSCR-NEXT: v_writelane_b32 v1, s55, 16 -; FLATSCR-NEXT: v_writelane_b32 v1, s56, 17 -; FLATSCR-NEXT: v_writelane_b32 v1, s57, 18 -; FLATSCR-NEXT: v_writelane_b32 v1, s58, 19 -; FLATSCR-NEXT: v_writelane_b32 v1, s59, 20 -; FLATSCR-NEXT: v_writelane_b32 v1, s60, 21 -; FLATSCR-NEXT: v_writelane_b32 v1, s61, 22 -; FLATSCR-NEXT: v_writelane_b32 v1, s62, 23 -; FLATSCR-NEXT: v_writelane_b32 v1, s63, 24 -; FLATSCR-NEXT: v_writelane_b32 v1, s64, 25 -; FLATSCR-NEXT: v_writelane_b32 v1, s65, 26 -; FLATSCR-NEXT: v_writelane_b32 v1, s66, 27 -; FLATSCR-NEXT: v_writelane_b32 v1, s67, 28 -; FLATSCR-NEXT: v_writelane_b32 v1, s68, 29 -; FLATSCR-NEXT: v_writelane_b32 v1, s69, 30 -; FLATSCR-NEXT: v_writelane_b32 v1, s70, 31 -; FLATSCR-NEXT: v_writelane_b32 v1, s71, 32 -; FLATSCR-NEXT: v_writelane_b32 v1, s72, 33 -; FLATSCR-NEXT: v_writelane_b32 v1, s73, 34 -; FLATSCR-NEXT: v_writelane_b32 v1, s74, 35 -; FLATSCR-NEXT: v_writelane_b32 v1, s75, 36 -; FLATSCR-NEXT: v_writelane_b32 v1, s76, 37 -; FLATSCR-NEXT: v_writelane_b32 v1, s77, 38 -; FLATSCR-NEXT: v_writelane_b32 v1, s78, 39 -; FLATSCR-NEXT: v_writelane_b32 v1, s79, 40 -; FLATSCR-NEXT: v_writelane_b32 v1, s80, 41 -; FLATSCR-NEXT: v_writelane_b32 v1, s81, 42 -; FLATSCR-NEXT: v_writelane_b32 v1, s82, 43 -; FLATSCR-NEXT: v_writelane_b32 v1, s83, 44 -; FLATSCR-NEXT: v_writelane_b32 v1, s84, 45 -; FLATSCR-NEXT: v_writelane_b32 v1, s85, 46 -; FLATSCR-NEXT: v_writelane_b32 v1, s86, 47 -; FLATSCR-NEXT: v_writelane_b32 v1, s87, 48 -; FLATSCR-NEXT: v_writelane_b32 v1, s88, 49 -; FLATSCR-NEXT: v_writelane_b32 v1, s89, 50 -; FLATSCR-NEXT: v_writelane_b32 v1, s90, 51 -; FLATSCR-NEXT: v_writelane_b32 v1, s91, 52 -; FLATSCR-NEXT: v_writelane_b32 v1, s92, 53 -; FLATSCR-NEXT: v_writelane_b32 v1, s93, 54 -; FLATSCR-NEXT: v_writelane_b32 v1, s94, 55 -; FLATSCR-NEXT: v_writelane_b32 v1, s95, 56 -; FLATSCR-NEXT: v_writelane_b32 v1, s96, 57 -; FLATSCR-NEXT: v_writelane_b32 v1, s97, 58 -; FLATSCR-NEXT: v_writelane_b32 v1, s98, 59 -; FLATSCR-NEXT: v_writelane_b32 v1, s99, 60 -; FLATSCR-NEXT: v_writelane_b32 v1, s100, 61 +; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0 +; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1 +; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2 +; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3 +; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4 +; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5 +; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6 +; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7 +; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8 +; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9 +; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10 +; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11 +; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12 +; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13 +; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14 +; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15 +; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16 +; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17 +; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18 +; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19 +; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20 +; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21 +; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22 +; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23 +; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24 +; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25 +; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26 +; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27 +; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28 +; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29 +; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v1, s101, 62 +; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 63 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_readlane_b32 s102, v1, 63 -; FLATSCR-NEXT: v_readlane_b32 s101, v1, 62 -; FLATSCR-NEXT: v_readlane_b32 s100, v1, 61 -; FLATSCR-NEXT: v_readlane_b32 s99, v1, 60 -; FLATSCR-NEXT: v_readlane_b32 s98, v1, 59 -; FLATSCR-NEXT: v_readlane_b32 s97, v1, 58 -; FLATSCR-NEXT: v_readlane_b32 s96, v1, 57 -; FLATSCR-NEXT: v_readlane_b32 s95, v1, 56 -; FLATSCR-NEXT: v_readlane_b32 s94, v1, 55 -; FLATSCR-NEXT: v_readlane_b32 s93, v1, 54 -; FLATSCR-NEXT: v_readlane_b32 s92, v1, 53 -; FLATSCR-NEXT: v_readlane_b32 s91, v1, 52 -; FLATSCR-NEXT: v_readlane_b32 s90, v1, 51 -; FLATSCR-NEXT: v_readlane_b32 s89, v1, 50 -; FLATSCR-NEXT: v_readlane_b32 s88, v1, 49 -; FLATSCR-NEXT: v_readlane_b32 s87, v1, 48 -; FLATSCR-NEXT: v_readlane_b32 s86, v1, 47 -; FLATSCR-NEXT: v_readlane_b32 s85, v1, 46 -; FLATSCR-NEXT: v_readlane_b32 s84, v1, 45 -; FLATSCR-NEXT: v_readlane_b32 s83, v1, 44 -; FLATSCR-NEXT: v_readlane_b32 s82, v1, 43 -; FLATSCR-NEXT: v_readlane_b32 s81, v1, 42 -; FLATSCR-NEXT: v_readlane_b32 s80, v1, 41 -; FLATSCR-NEXT: v_readlane_b32 s79, v1, 40 -; FLATSCR-NEXT: v_readlane_b32 s78, v1, 39 -; FLATSCR-NEXT: v_readlane_b32 s77, v1, 38 -; FLATSCR-NEXT: v_readlane_b32 s76, v1, 37 -; FLATSCR-NEXT: v_readlane_b32 s75, v1, 36 -; FLATSCR-NEXT: v_readlane_b32 s74, v1, 35 -; FLATSCR-NEXT: v_readlane_b32 s73, v1, 34 -; FLATSCR-NEXT: v_readlane_b32 s72, v1, 33 -; FLATSCR-NEXT: v_readlane_b32 s71, v1, 32 -; FLATSCR-NEXT: v_readlane_b32 s70, v1, 31 -; FLATSCR-NEXT: v_readlane_b32 s69, v1, 30 -; FLATSCR-NEXT: v_readlane_b32 s68, v1, 29 -; FLATSCR-NEXT: v_readlane_b32 s67, v1, 28 -; FLATSCR-NEXT: v_readlane_b32 s66, v1, 27 -; FLATSCR-NEXT: v_readlane_b32 s65, v1, 26 -; FLATSCR-NEXT: v_readlane_b32 s64, v1, 25 -; FLATSCR-NEXT: v_readlane_b32 s63, v1, 24 -; FLATSCR-NEXT: v_readlane_b32 s62, v1, 23 -; FLATSCR-NEXT: v_readlane_b32 s61, v1, 22 -; FLATSCR-NEXT: v_readlane_b32 s60, v1, 21 -; FLATSCR-NEXT: v_readlane_b32 s59, v1, 20 -; FLATSCR-NEXT: v_readlane_b32 s58, v1, 19 -; FLATSCR-NEXT: v_readlane_b32 s57, v1, 18 -; FLATSCR-NEXT: v_readlane_b32 s56, v1, 17 -; FLATSCR-NEXT: v_readlane_b32 s55, v1, 16 -; FLATSCR-NEXT: v_readlane_b32 s54, v1, 15 -; FLATSCR-NEXT: v_readlane_b32 s53, v1, 14 -; FLATSCR-NEXT: v_readlane_b32 s52, v1, 13 -; FLATSCR-NEXT: v_readlane_b32 s51, v1, 12 -; FLATSCR-NEXT: v_readlane_b32 s50, v1, 11 -; FLATSCR-NEXT: v_readlane_b32 s49, v1, 10 -; FLATSCR-NEXT: v_readlane_b32 s48, v1, 9 -; FLATSCR-NEXT: v_readlane_b32 s47, v1, 8 -; FLATSCR-NEXT: v_readlane_b32 s46, v1, 7 -; FLATSCR-NEXT: v_readlane_b32 s45, v1, 6 -; FLATSCR-NEXT: v_readlane_b32 s44, v1, 5 -; FLATSCR-NEXT: v_readlane_b32 s43, v1, 4 -; FLATSCR-NEXT: v_readlane_b32 s42, v1, 3 -; FLATSCR-NEXT: v_readlane_b32 s41, v1, 2 -; FLATSCR-NEXT: v_readlane_b32 s40, v1, 1 -; FLATSCR-NEXT: v_readlane_b32 s39, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32 +; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31 +; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30 +; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29 +; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28 +; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27 +; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26 +; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25 +; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24 +; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23 +; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22 +; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21 +; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20 +; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19 +; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18 +; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17 +; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16 +; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15 +; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14 +; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13 +; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12 +; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11 +; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10 +; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9 +; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8 +; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7 +; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6 +; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5 +; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4 +; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3 +; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2 +; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1 +; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload @@ -1346,7 +980,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 vcc_lo, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -1365,14 +999,14 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, vcc_lo +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 vcc_lo, s33 +; FLATSCR-NEXT: s_mov_b32 s38, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill @@ -1391,7 +1025,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_mov_b32 s33, vcc_lo +; FLATSCR-NEXT: s_mov_b32 s33, s38 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -1412,7 +1046,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 vcc_lo, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -1434,14 +1068,14 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, vcc_lo +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 vcc_lo, s33 +; FLATSCR-NEXT: s_mov_b32 s38, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill @@ -1463,7 +1097,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_mov_b32 s33, vcc_lo +; FLATSCR-NEXT: s_mov_b32 s33, s38 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -1491,7 +1125,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; MUBUF-LABEL: scratch_reg_needed_mubuf_offset: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 vcc_lo, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 @@ -1517,14 +1151,14 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s6 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, vcc_lo +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: scratch_reg_needed_mubuf_offset: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 vcc_lo, s33 +; FLATSCR-NEXT: s_mov_b32 s38, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 @@ -1550,7 +1184,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 ; FLATSCR-NEXT: scratch_load_dword v40, off, s2 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_mov_b32 s33, vcc_lo +; FLATSCR-NEXT: s_mov_b32 s33, s38 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -1650,22 +1284,15 @@ define void @callee_need_to_spill_fp_to_memory() #3 { ; MUBUF-LABEL: callee_need_to_spill_fp_to_memory: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: v_mov_b32_e32 v0, s4 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; 4-byte Folded Spill ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x200 -; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_readfirstlane_b32 s4, v0 -; MUBUF-NEXT: s_mov_b32 s33, s4 +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: callee_need_to_spill_fp_to_memory: @@ -1702,156 +1329,89 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; MUBUF-LABEL: callee_need_to_spill_fp_to_memory_full_reserved_vgpr: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 -; MUBUF-NEXT: v_writelane_b32 v39, s40, 1 -; MUBUF-NEXT: v_writelane_b32 v39, s41, 2 -; MUBUF-NEXT: v_writelane_b32 v39, s42, 3 -; MUBUF-NEXT: v_writelane_b32 v39, s43, 4 -; MUBUF-NEXT: v_writelane_b32 v39, s44, 5 -; MUBUF-NEXT: v_writelane_b32 v39, s45, 6 -; MUBUF-NEXT: v_writelane_b32 v39, s46, 7 -; MUBUF-NEXT: v_writelane_b32 v39, s47, 8 -; MUBUF-NEXT: v_writelane_b32 v39, s48, 9 -; MUBUF-NEXT: v_writelane_b32 v39, s49, 10 -; MUBUF-NEXT: v_writelane_b32 v39, s50, 11 -; MUBUF-NEXT: v_writelane_b32 v39, s51, 12 -; MUBUF-NEXT: v_writelane_b32 v39, s52, 13 -; MUBUF-NEXT: v_writelane_b32 v39, s53, 14 -; MUBUF-NEXT: v_writelane_b32 v39, s54, 15 -; MUBUF-NEXT: v_writelane_b32 v39, s55, 16 -; MUBUF-NEXT: v_writelane_b32 v39, s56, 17 -; MUBUF-NEXT: v_writelane_b32 v39, s57, 18 -; MUBUF-NEXT: v_writelane_b32 v39, s58, 19 -; MUBUF-NEXT: v_writelane_b32 v39, s59, 20 -; MUBUF-NEXT: v_writelane_b32 v39, s60, 21 -; MUBUF-NEXT: v_writelane_b32 v39, s61, 22 -; MUBUF-NEXT: v_writelane_b32 v39, s62, 23 -; MUBUF-NEXT: v_writelane_b32 v39, s63, 24 -; MUBUF-NEXT: v_writelane_b32 v39, s64, 25 -; MUBUF-NEXT: v_writelane_b32 v39, s65, 26 -; MUBUF-NEXT: v_writelane_b32 v39, s66, 27 -; MUBUF-NEXT: v_writelane_b32 v39, s67, 28 -; MUBUF-NEXT: v_writelane_b32 v39, s68, 29 -; MUBUF-NEXT: v_writelane_b32 v39, s69, 30 -; MUBUF-NEXT: v_writelane_b32 v39, s70, 31 -; MUBUF-NEXT: v_writelane_b32 v39, s71, 32 -; MUBUF-NEXT: v_writelane_b32 v39, s72, 33 -; MUBUF-NEXT: v_writelane_b32 v39, s73, 34 -; MUBUF-NEXT: v_writelane_b32 v39, s74, 35 -; MUBUF-NEXT: v_writelane_b32 v39, s75, 36 -; MUBUF-NEXT: v_writelane_b32 v39, s76, 37 -; MUBUF-NEXT: v_writelane_b32 v39, s77, 38 -; MUBUF-NEXT: v_writelane_b32 v39, s78, 39 -; MUBUF-NEXT: v_writelane_b32 v39, s79, 40 -; MUBUF-NEXT: v_writelane_b32 v39, s80, 41 -; MUBUF-NEXT: v_writelane_b32 v39, s81, 42 -; MUBUF-NEXT: v_writelane_b32 v39, s82, 43 -; MUBUF-NEXT: v_writelane_b32 v39, s83, 44 -; MUBUF-NEXT: v_writelane_b32 v39, s84, 45 -; MUBUF-NEXT: v_writelane_b32 v39, s85, 46 -; MUBUF-NEXT: v_writelane_b32 v39, s86, 47 -; MUBUF-NEXT: v_writelane_b32 v39, s87, 48 -; MUBUF-NEXT: v_writelane_b32 v39, s88, 49 -; MUBUF-NEXT: v_writelane_b32 v39, s89, 50 -; MUBUF-NEXT: v_writelane_b32 v39, s90, 51 -; MUBUF-NEXT: v_writelane_b32 v39, s91, 52 -; MUBUF-NEXT: v_writelane_b32 v39, s92, 53 -; MUBUF-NEXT: v_writelane_b32 v39, s93, 54 -; MUBUF-NEXT: v_writelane_b32 v39, s94, 55 -; MUBUF-NEXT: v_writelane_b32 v39, s95, 56 -; MUBUF-NEXT: v_writelane_b32 v39, s96, 57 -; MUBUF-NEXT: v_writelane_b32 v39, s97, 58 -; MUBUF-NEXT: v_writelane_b32 v39, s98, 59 -; MUBUF-NEXT: v_writelane_b32 v39, s99, 60 -; MUBUF-NEXT: v_writelane_b32 v39, s100, 61 -; MUBUF-NEXT: v_mov_b32_e32 v0, s4 -; MUBUF-NEXT: v_writelane_b32 v39, s101, 62 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v39, s102, 63 +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: v_writelane_b32 v39, s46, 0 +; MUBUF-NEXT: v_writelane_b32 v39, s47, 1 +; MUBUF-NEXT: v_writelane_b32 v39, s48, 2 +; MUBUF-NEXT: v_writelane_b32 v39, s49, 3 +; MUBUF-NEXT: v_writelane_b32 v39, s50, 4 +; MUBUF-NEXT: v_writelane_b32 v39, s51, 5 +; MUBUF-NEXT: v_writelane_b32 v39, s52, 6 +; MUBUF-NEXT: v_writelane_b32 v39, s53, 7 +; MUBUF-NEXT: v_writelane_b32 v39, s62, 8 +; MUBUF-NEXT: v_writelane_b32 v39, s63, 9 +; MUBUF-NEXT: v_writelane_b32 v39, s64, 10 +; MUBUF-NEXT: v_writelane_b32 v39, s65, 11 +; MUBUF-NEXT: v_writelane_b32 v39, s66, 12 +; MUBUF-NEXT: v_writelane_b32 v39, s67, 13 +; MUBUF-NEXT: v_writelane_b32 v39, s68, 14 +; MUBUF-NEXT: v_writelane_b32 v39, s69, 15 +; MUBUF-NEXT: v_writelane_b32 v39, s78, 16 +; MUBUF-NEXT: v_writelane_b32 v39, s79, 17 +; MUBUF-NEXT: v_writelane_b32 v39, s80, 18 +; MUBUF-NEXT: v_writelane_b32 v39, s81, 19 +; MUBUF-NEXT: v_writelane_b32 v39, s82, 20 +; MUBUF-NEXT: v_writelane_b32 v39, s83, 21 +; MUBUF-NEXT: v_writelane_b32 v39, s84, 22 +; MUBUF-NEXT: v_writelane_b32 v39, s85, 23 +; MUBUF-NEXT: v_writelane_b32 v39, s94, 24 +; MUBUF-NEXT: v_writelane_b32 v39, s95, 25 +; MUBUF-NEXT: v_writelane_b32 v39, s96, 26 +; MUBUF-NEXT: v_writelane_b32 v39, s97, 27 +; MUBUF-NEXT: v_writelane_b32 v39, s98, 28 +; MUBUF-NEXT: v_writelane_b32 v39, s99, 29 +; MUBUF-NEXT: v_writelane_b32 v39, s100, 30 +; MUBUF-NEXT: v_writelane_b32 v39, s101, 31 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 +; MUBUF-NEXT: v_writelane_b32 v39, s102, 32 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs except CSR v40 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_addk_i32 s32, 0x300 -; MUBUF-NEXT: v_readlane_b32 s102, v39, 63 -; MUBUF-NEXT: v_readlane_b32 s101, v39, 62 -; MUBUF-NEXT: v_readlane_b32 s100, v39, 61 -; MUBUF-NEXT: v_readlane_b32 s99, v39, 60 -; MUBUF-NEXT: v_readlane_b32 s98, v39, 59 -; MUBUF-NEXT: v_readlane_b32 s97, v39, 58 -; MUBUF-NEXT: v_readlane_b32 s96, v39, 57 -; MUBUF-NEXT: v_readlane_b32 s95, v39, 56 -; MUBUF-NEXT: v_readlane_b32 s94, v39, 55 -; MUBUF-NEXT: v_readlane_b32 s93, v39, 54 -; MUBUF-NEXT: v_readlane_b32 s92, v39, 53 -; MUBUF-NEXT: v_readlane_b32 s91, v39, 52 -; MUBUF-NEXT: v_readlane_b32 s90, v39, 51 -; MUBUF-NEXT: v_readlane_b32 s89, v39, 50 -; MUBUF-NEXT: v_readlane_b32 s88, v39, 49 -; MUBUF-NEXT: v_readlane_b32 s87, v39, 48 -; MUBUF-NEXT: v_readlane_b32 s86, v39, 47 -; MUBUF-NEXT: v_readlane_b32 s85, v39, 46 -; MUBUF-NEXT: v_readlane_b32 s84, v39, 45 -; MUBUF-NEXT: v_readlane_b32 s83, v39, 44 -; MUBUF-NEXT: v_readlane_b32 s82, v39, 43 -; MUBUF-NEXT: v_readlane_b32 s81, v39, 42 -; MUBUF-NEXT: v_readlane_b32 s80, v39, 41 -; MUBUF-NEXT: v_readlane_b32 s79, v39, 40 -; MUBUF-NEXT: v_readlane_b32 s78, v39, 39 -; MUBUF-NEXT: v_readlane_b32 s77, v39, 38 -; MUBUF-NEXT: v_readlane_b32 s76, v39, 37 -; MUBUF-NEXT: v_readlane_b32 s75, v39, 36 -; MUBUF-NEXT: v_readlane_b32 s74, v39, 35 -; MUBUF-NEXT: v_readlane_b32 s73, v39, 34 -; MUBUF-NEXT: v_readlane_b32 s72, v39, 33 -; MUBUF-NEXT: v_readlane_b32 s71, v39, 32 -; MUBUF-NEXT: v_readlane_b32 s70, v39, 31 -; MUBUF-NEXT: v_readlane_b32 s69, v39, 30 -; MUBUF-NEXT: v_readlane_b32 s68, v39, 29 -; MUBUF-NEXT: v_readlane_b32 s67, v39, 28 -; MUBUF-NEXT: v_readlane_b32 s66, v39, 27 -; MUBUF-NEXT: v_readlane_b32 s65, v39, 26 -; MUBUF-NEXT: v_readlane_b32 s64, v39, 25 -; MUBUF-NEXT: v_readlane_b32 s63, v39, 24 -; MUBUF-NEXT: v_readlane_b32 s62, v39, 23 -; MUBUF-NEXT: v_readlane_b32 s61, v39, 22 -; MUBUF-NEXT: v_readlane_b32 s60, v39, 21 -; MUBUF-NEXT: v_readlane_b32 s59, v39, 20 -; MUBUF-NEXT: v_readlane_b32 s58, v39, 19 -; MUBUF-NEXT: v_readlane_b32 s57, v39, 18 -; MUBUF-NEXT: v_readlane_b32 s56, v39, 17 -; MUBUF-NEXT: v_readlane_b32 s55, v39, 16 -; MUBUF-NEXT: v_readlane_b32 s54, v39, 15 -; MUBUF-NEXT: v_readlane_b32 s53, v39, 14 -; MUBUF-NEXT: v_readlane_b32 s52, v39, 13 -; MUBUF-NEXT: v_readlane_b32 s51, v39, 12 -; MUBUF-NEXT: v_readlane_b32 s50, v39, 11 -; MUBUF-NEXT: v_readlane_b32 s49, v39, 10 -; MUBUF-NEXT: v_readlane_b32 s48, v39, 9 -; MUBUF-NEXT: v_readlane_b32 s47, v39, 8 -; MUBUF-NEXT: v_readlane_b32 s46, v39, 7 -; MUBUF-NEXT: v_readlane_b32 s45, v39, 6 -; MUBUF-NEXT: v_readlane_b32 s44, v39, 5 -; MUBUF-NEXT: v_readlane_b32 s43, v39, 4 -; MUBUF-NEXT: v_readlane_b32 s42, v39, 3 -; MUBUF-NEXT: v_readlane_b32 s41, v39, 2 -; MUBUF-NEXT: v_readlane_b32 s40, v39, 1 -; MUBUF-NEXT: v_readlane_b32 s39, v39, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v39, 32 +; MUBUF-NEXT: v_readlane_b32 s101, v39, 31 +; MUBUF-NEXT: v_readlane_b32 s100, v39, 30 +; MUBUF-NEXT: v_readlane_b32 s99, v39, 29 +; MUBUF-NEXT: v_readlane_b32 s98, v39, 28 +; MUBUF-NEXT: v_readlane_b32 s97, v39, 27 +; MUBUF-NEXT: v_readlane_b32 s96, v39, 26 +; MUBUF-NEXT: v_readlane_b32 s95, v39, 25 +; MUBUF-NEXT: v_readlane_b32 s94, v39, 24 +; MUBUF-NEXT: v_readlane_b32 s85, v39, 23 +; MUBUF-NEXT: v_readlane_b32 s84, v39, 22 +; MUBUF-NEXT: v_readlane_b32 s83, v39, 21 +; MUBUF-NEXT: v_readlane_b32 s82, v39, 20 +; MUBUF-NEXT: v_readlane_b32 s81, v39, 19 +; MUBUF-NEXT: v_readlane_b32 s80, v39, 18 +; MUBUF-NEXT: v_readlane_b32 s79, v39, 17 +; MUBUF-NEXT: v_readlane_b32 s78, v39, 16 +; MUBUF-NEXT: v_readlane_b32 s69, v39, 15 +; MUBUF-NEXT: v_readlane_b32 s68, v39, 14 +; MUBUF-NEXT: v_readlane_b32 s67, v39, 13 +; MUBUF-NEXT: v_readlane_b32 s66, v39, 12 +; MUBUF-NEXT: v_readlane_b32 s65, v39, 11 +; MUBUF-NEXT: v_readlane_b32 s64, v39, 10 +; MUBUF-NEXT: v_readlane_b32 s63, v39, 9 +; MUBUF-NEXT: v_readlane_b32 s62, v39, 8 +; MUBUF-NEXT: v_readlane_b32 s53, v39, 7 +; MUBUF-NEXT: v_readlane_b32 s52, v39, 6 +; MUBUF-NEXT: v_readlane_b32 s51, v39, 5 +; MUBUF-NEXT: v_readlane_b32 s50, v39, 4 +; MUBUF-NEXT: v_readlane_b32 s49, v39, 3 +; MUBUF-NEXT: v_readlane_b32 s48, v39, 2 +; MUBUF-NEXT: v_readlane_b32 s47, v39, 1 +; MUBUF-NEXT: v_readlane_b32 s46, v39, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_readfirstlane_b32 s4, v0 -; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: s_mov_b32 s33, s4 +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -1863,141 +1423,79 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v39, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 -; FLATSCR-NEXT: v_writelane_b32 v39, s40, 1 -; FLATSCR-NEXT: v_writelane_b32 v39, s41, 2 -; FLATSCR-NEXT: v_writelane_b32 v39, s42, 3 -; FLATSCR-NEXT: v_writelane_b32 v39, s43, 4 -; FLATSCR-NEXT: v_writelane_b32 v39, s44, 5 -; FLATSCR-NEXT: v_writelane_b32 v39, s45, 6 -; FLATSCR-NEXT: v_writelane_b32 v39, s46, 7 -; FLATSCR-NEXT: v_writelane_b32 v39, s47, 8 -; FLATSCR-NEXT: v_writelane_b32 v39, s48, 9 -; FLATSCR-NEXT: v_writelane_b32 v39, s49, 10 -; FLATSCR-NEXT: v_writelane_b32 v39, s50, 11 -; FLATSCR-NEXT: v_writelane_b32 v39, s51, 12 -; FLATSCR-NEXT: v_writelane_b32 v39, s52, 13 -; FLATSCR-NEXT: v_writelane_b32 v39, s53, 14 -; FLATSCR-NEXT: v_writelane_b32 v39, s54, 15 -; FLATSCR-NEXT: v_writelane_b32 v39, s55, 16 -; FLATSCR-NEXT: v_writelane_b32 v39, s56, 17 -; FLATSCR-NEXT: v_writelane_b32 v39, s57, 18 -; FLATSCR-NEXT: v_writelane_b32 v39, s58, 19 -; FLATSCR-NEXT: v_writelane_b32 v39, s59, 20 -; FLATSCR-NEXT: v_writelane_b32 v39, s60, 21 -; FLATSCR-NEXT: v_writelane_b32 v39, s61, 22 -; FLATSCR-NEXT: v_writelane_b32 v39, s62, 23 -; FLATSCR-NEXT: v_writelane_b32 v39, s63, 24 -; FLATSCR-NEXT: v_writelane_b32 v39, s64, 25 -; FLATSCR-NEXT: v_writelane_b32 v39, s65, 26 -; FLATSCR-NEXT: v_writelane_b32 v39, s66, 27 -; FLATSCR-NEXT: v_writelane_b32 v39, s67, 28 -; FLATSCR-NEXT: v_writelane_b32 v39, s68, 29 -; FLATSCR-NEXT: v_writelane_b32 v39, s69, 30 -; FLATSCR-NEXT: v_writelane_b32 v39, s70, 31 -; FLATSCR-NEXT: v_writelane_b32 v39, s71, 32 -; FLATSCR-NEXT: v_writelane_b32 v39, s72, 33 -; FLATSCR-NEXT: v_writelane_b32 v39, s73, 34 -; FLATSCR-NEXT: v_writelane_b32 v39, s74, 35 -; FLATSCR-NEXT: v_writelane_b32 v39, s75, 36 -; FLATSCR-NEXT: v_writelane_b32 v39, s76, 37 -; FLATSCR-NEXT: v_writelane_b32 v39, s77, 38 -; FLATSCR-NEXT: v_writelane_b32 v39, s78, 39 -; FLATSCR-NEXT: v_writelane_b32 v39, s79, 40 -; FLATSCR-NEXT: v_writelane_b32 v39, s80, 41 -; FLATSCR-NEXT: v_writelane_b32 v39, s81, 42 -; FLATSCR-NEXT: v_writelane_b32 v39, s82, 43 -; FLATSCR-NEXT: v_writelane_b32 v39, s83, 44 -; FLATSCR-NEXT: v_writelane_b32 v39, s84, 45 -; FLATSCR-NEXT: v_writelane_b32 v39, s85, 46 -; FLATSCR-NEXT: v_writelane_b32 v39, s86, 47 -; FLATSCR-NEXT: v_writelane_b32 v39, s87, 48 -; FLATSCR-NEXT: v_writelane_b32 v39, s88, 49 -; FLATSCR-NEXT: v_writelane_b32 v39, s89, 50 -; FLATSCR-NEXT: v_writelane_b32 v39, s90, 51 -; FLATSCR-NEXT: v_writelane_b32 v39, s91, 52 -; FLATSCR-NEXT: v_writelane_b32 v39, s92, 53 -; FLATSCR-NEXT: v_writelane_b32 v39, s93, 54 -; FLATSCR-NEXT: v_writelane_b32 v39, s94, 55 -; FLATSCR-NEXT: v_writelane_b32 v39, s95, 56 -; FLATSCR-NEXT: v_writelane_b32 v39, s96, 57 -; FLATSCR-NEXT: v_writelane_b32 v39, s97, 58 -; FLATSCR-NEXT: v_writelane_b32 v39, s98, 59 -; FLATSCR-NEXT: v_writelane_b32 v39, s99, 60 -; FLATSCR-NEXT: v_writelane_b32 v39, s100, 61 -; FLATSCR-NEXT: v_writelane_b32 v39, s101, 62 +; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0 +; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1 +; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2 +; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3 +; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4 +; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5 +; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6 +; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7 +; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8 +; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9 +; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10 +; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11 +; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12 +; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13 +; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14 +; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15 +; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16 +; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17 +; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18 +; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19 +; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20 +; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21 +; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22 +; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23 +; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24 +; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25 +; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26 +; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27 +; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28 +; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29 +; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30 +; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31 ; FLATSCR-NEXT: s_add_i32 s32, s32, 8 -; FLATSCR-NEXT: v_writelane_b32 v39, s102, 63 +; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s102, v39, 63 -; FLATSCR-NEXT: v_readlane_b32 s101, v39, 62 -; FLATSCR-NEXT: v_readlane_b32 s100, v39, 61 -; FLATSCR-NEXT: v_readlane_b32 s99, v39, 60 -; FLATSCR-NEXT: v_readlane_b32 s98, v39, 59 -; FLATSCR-NEXT: v_readlane_b32 s97, v39, 58 -; FLATSCR-NEXT: v_readlane_b32 s96, v39, 57 -; FLATSCR-NEXT: v_readlane_b32 s95, v39, 56 -; FLATSCR-NEXT: v_readlane_b32 s94, v39, 55 -; FLATSCR-NEXT: v_readlane_b32 s93, v39, 54 -; FLATSCR-NEXT: v_readlane_b32 s92, v39, 53 -; FLATSCR-NEXT: v_readlane_b32 s91, v39, 52 -; FLATSCR-NEXT: v_readlane_b32 s90, v39, 51 -; FLATSCR-NEXT: v_readlane_b32 s89, v39, 50 -; FLATSCR-NEXT: v_readlane_b32 s88, v39, 49 -; FLATSCR-NEXT: v_readlane_b32 s87, v39, 48 -; FLATSCR-NEXT: v_readlane_b32 s86, v39, 47 -; FLATSCR-NEXT: v_readlane_b32 s85, v39, 46 -; FLATSCR-NEXT: v_readlane_b32 s84, v39, 45 -; FLATSCR-NEXT: v_readlane_b32 s83, v39, 44 -; FLATSCR-NEXT: v_readlane_b32 s82, v39, 43 -; FLATSCR-NEXT: v_readlane_b32 s81, v39, 42 -; FLATSCR-NEXT: v_readlane_b32 s80, v39, 41 -; FLATSCR-NEXT: v_readlane_b32 s79, v39, 40 -; FLATSCR-NEXT: v_readlane_b32 s78, v39, 39 -; FLATSCR-NEXT: v_readlane_b32 s77, v39, 38 -; FLATSCR-NEXT: v_readlane_b32 s76, v39, 37 -; FLATSCR-NEXT: v_readlane_b32 s75, v39, 36 -; FLATSCR-NEXT: v_readlane_b32 s74, v39, 35 -; FLATSCR-NEXT: v_readlane_b32 s73, v39, 34 -; FLATSCR-NEXT: v_readlane_b32 s72, v39, 33 -; FLATSCR-NEXT: v_readlane_b32 s71, v39, 32 -; FLATSCR-NEXT: v_readlane_b32 s70, v39, 31 -; FLATSCR-NEXT: v_readlane_b32 s69, v39, 30 -; FLATSCR-NEXT: v_readlane_b32 s68, v39, 29 -; FLATSCR-NEXT: v_readlane_b32 s67, v39, 28 -; FLATSCR-NEXT: v_readlane_b32 s66, v39, 27 -; FLATSCR-NEXT: v_readlane_b32 s65, v39, 26 -; FLATSCR-NEXT: v_readlane_b32 s64, v39, 25 -; FLATSCR-NEXT: v_readlane_b32 s63, v39, 24 -; FLATSCR-NEXT: v_readlane_b32 s62, v39, 23 -; FLATSCR-NEXT: v_readlane_b32 s61, v39, 22 -; FLATSCR-NEXT: v_readlane_b32 s60, v39, 21 -; FLATSCR-NEXT: v_readlane_b32 s59, v39, 20 -; FLATSCR-NEXT: v_readlane_b32 s58, v39, 19 -; FLATSCR-NEXT: v_readlane_b32 s57, v39, 18 -; FLATSCR-NEXT: v_readlane_b32 s56, v39, 17 -; FLATSCR-NEXT: v_readlane_b32 s55, v39, 16 -; FLATSCR-NEXT: v_readlane_b32 s54, v39, 15 -; FLATSCR-NEXT: v_readlane_b32 s53, v39, 14 -; FLATSCR-NEXT: v_readlane_b32 s52, v39, 13 -; FLATSCR-NEXT: v_readlane_b32 s51, v39, 12 -; FLATSCR-NEXT: v_readlane_b32 s50, v39, 11 -; FLATSCR-NEXT: v_readlane_b32 s49, v39, 10 -; FLATSCR-NEXT: v_readlane_b32 s48, v39, 9 -; FLATSCR-NEXT: v_readlane_b32 s47, v39, 8 -; FLATSCR-NEXT: v_readlane_b32 s46, v39, 7 -; FLATSCR-NEXT: v_readlane_b32 s45, v39, 6 -; FLATSCR-NEXT: v_readlane_b32 s44, v39, 5 -; FLATSCR-NEXT: v_readlane_b32 s43, v39, 4 -; FLATSCR-NEXT: v_readlane_b32 s42, v39, 3 -; FLATSCR-NEXT: v_readlane_b32 s41, v39, 2 -; FLATSCR-NEXT: v_readlane_b32 s40, v39, 1 -; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32 +; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31 +; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30 +; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29 +; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28 +; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27 +; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26 +; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25 +; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24 +; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23 +; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22 +; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21 +; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20 +; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19 +; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18 +; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17 +; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16 +; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15 +; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14 +; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13 +; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12 +; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11 +; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10 +; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9 +; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8 +; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7 +; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6 +; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5 +; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4 +; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3 +; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2 +; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1 +; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v39, off, s33 ; 4-byte Folded Reload @@ -2033,155 +1531,89 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; MUBUF-LABEL: callee_need_to_spill_fp_to_reg: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v40, s39, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s40, 1 -; MUBUF-NEXT: v_writelane_b32 v40, s41, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s42, 3 -; MUBUF-NEXT: v_writelane_b32 v40, s43, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s44, 5 -; MUBUF-NEXT: v_writelane_b32 v40, s45, 6 -; MUBUF-NEXT: v_writelane_b32 v40, s46, 7 -; MUBUF-NEXT: v_writelane_b32 v40, s47, 8 -; MUBUF-NEXT: v_writelane_b32 v40, s48, 9 -; MUBUF-NEXT: v_writelane_b32 v40, s49, 10 -; MUBUF-NEXT: v_writelane_b32 v40, s50, 11 -; MUBUF-NEXT: v_writelane_b32 v40, s51, 12 -; MUBUF-NEXT: v_writelane_b32 v40, s52, 13 -; MUBUF-NEXT: v_writelane_b32 v40, s53, 14 -; MUBUF-NEXT: v_writelane_b32 v40, s54, 15 -; MUBUF-NEXT: v_writelane_b32 v40, s55, 16 -; MUBUF-NEXT: v_writelane_b32 v40, s56, 17 -; MUBUF-NEXT: v_writelane_b32 v40, s57, 18 -; MUBUF-NEXT: v_writelane_b32 v40, s58, 19 -; MUBUF-NEXT: v_writelane_b32 v40, s59, 20 -; MUBUF-NEXT: v_writelane_b32 v40, s60, 21 -; MUBUF-NEXT: v_writelane_b32 v40, s61, 22 -; MUBUF-NEXT: v_writelane_b32 v40, s62, 23 -; MUBUF-NEXT: v_writelane_b32 v40, s63, 24 -; MUBUF-NEXT: v_writelane_b32 v40, s64, 25 -; MUBUF-NEXT: v_writelane_b32 v40, s65, 26 -; MUBUF-NEXT: v_writelane_b32 v40, s66, 27 -; MUBUF-NEXT: v_writelane_b32 v40, s67, 28 -; MUBUF-NEXT: v_writelane_b32 v40, s68, 29 -; MUBUF-NEXT: v_writelane_b32 v40, s69, 30 -; MUBUF-NEXT: v_writelane_b32 v40, s70, 31 -; MUBUF-NEXT: v_writelane_b32 v40, s71, 32 -; MUBUF-NEXT: v_writelane_b32 v40, s72, 33 -; MUBUF-NEXT: v_writelane_b32 v40, s73, 34 -; MUBUF-NEXT: v_writelane_b32 v40, s74, 35 -; MUBUF-NEXT: v_writelane_b32 v40, s75, 36 -; MUBUF-NEXT: v_writelane_b32 v40, s76, 37 -; MUBUF-NEXT: v_writelane_b32 v40, s77, 38 -; MUBUF-NEXT: v_writelane_b32 v40, s78, 39 -; MUBUF-NEXT: v_writelane_b32 v40, s79, 40 -; MUBUF-NEXT: v_writelane_b32 v40, s80, 41 -; MUBUF-NEXT: v_writelane_b32 v40, s81, 42 -; MUBUF-NEXT: v_writelane_b32 v40, s82, 43 -; MUBUF-NEXT: v_writelane_b32 v40, s83, 44 -; MUBUF-NEXT: v_writelane_b32 v40, s84, 45 -; MUBUF-NEXT: v_writelane_b32 v40, s85, 46 -; MUBUF-NEXT: v_writelane_b32 v40, s86, 47 -; MUBUF-NEXT: v_writelane_b32 v40, s87, 48 -; MUBUF-NEXT: v_writelane_b32 v40, s88, 49 -; MUBUF-NEXT: v_writelane_b32 v40, s89, 50 -; MUBUF-NEXT: v_writelane_b32 v40, s90, 51 -; MUBUF-NEXT: v_writelane_b32 v40, s91, 52 -; MUBUF-NEXT: v_writelane_b32 v40, s92, 53 -; MUBUF-NEXT: v_writelane_b32 v40, s93, 54 -; MUBUF-NEXT: v_writelane_b32 v40, s94, 55 -; MUBUF-NEXT: v_writelane_b32 v40, s95, 56 -; MUBUF-NEXT: v_writelane_b32 v40, s96, 57 -; MUBUF-NEXT: v_writelane_b32 v40, s97, 58 -; MUBUF-NEXT: v_writelane_b32 v40, s98, 59 -; MUBUF-NEXT: v_writelane_b32 v40, s99, 60 -; MUBUF-NEXT: v_writelane_b32 v40, s100, 61 -; MUBUF-NEXT: v_writelane_b32 v40, s101, 62 -; MUBUF-NEXT: v_writelane_b32 v41, s4, 0 -; MUBUF-NEXT: s_addk_i32 s32, 0x300 -; MUBUF-NEXT: v_writelane_b32 v40, s102, 63 +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: v_writelane_b32 v40, s46, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s47, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s48, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s49, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s50, 4 +; MUBUF-NEXT: v_writelane_b32 v40, s51, 5 +; MUBUF-NEXT: v_writelane_b32 v40, s52, 6 +; MUBUF-NEXT: v_writelane_b32 v40, s53, 7 +; MUBUF-NEXT: v_writelane_b32 v40, s62, 8 +; MUBUF-NEXT: v_writelane_b32 v40, s63, 9 +; MUBUF-NEXT: v_writelane_b32 v40, s64, 10 +; MUBUF-NEXT: v_writelane_b32 v40, s65, 11 +; MUBUF-NEXT: v_writelane_b32 v40, s66, 12 +; MUBUF-NEXT: v_writelane_b32 v40, s67, 13 +; MUBUF-NEXT: v_writelane_b32 v40, s68, 14 +; MUBUF-NEXT: v_writelane_b32 v40, s69, 15 +; MUBUF-NEXT: v_writelane_b32 v40, s78, 16 +; MUBUF-NEXT: v_writelane_b32 v40, s79, 17 +; MUBUF-NEXT: v_writelane_b32 v40, s80, 18 +; MUBUF-NEXT: v_writelane_b32 v40, s81, 19 +; MUBUF-NEXT: v_writelane_b32 v40, s82, 20 +; MUBUF-NEXT: v_writelane_b32 v40, s83, 21 +; MUBUF-NEXT: v_writelane_b32 v40, s84, 22 +; MUBUF-NEXT: v_writelane_b32 v40, s85, 23 +; MUBUF-NEXT: v_writelane_b32 v40, s94, 24 +; MUBUF-NEXT: v_writelane_b32 v40, s95, 25 +; MUBUF-NEXT: v_writelane_b32 v40, s96, 26 +; MUBUF-NEXT: v_writelane_b32 v40, s97, 27 +; MUBUF-NEXT: v_writelane_b32 v40, s98, 28 +; MUBUF-NEXT: v_writelane_b32 v40, s99, 29 +; MUBUF-NEXT: v_writelane_b32 v40, s100, 30 +; MUBUF-NEXT: v_writelane_b32 v40, s101, 31 +; MUBUF-NEXT: s_addk_i32 s32, 0x200 +; MUBUF-NEXT: v_writelane_b32 v40, s102, 32 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs except CSR v40 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s102, v40, 63 -; MUBUF-NEXT: v_readlane_b32 s101, v40, 62 -; MUBUF-NEXT: v_readlane_b32 s100, v40, 61 -; MUBUF-NEXT: v_readlane_b32 s99, v40, 60 -; MUBUF-NEXT: v_readlane_b32 s98, v40, 59 -; MUBUF-NEXT: v_readlane_b32 s97, v40, 58 -; MUBUF-NEXT: v_readlane_b32 s96, v40, 57 -; MUBUF-NEXT: v_readlane_b32 s95, v40, 56 -; MUBUF-NEXT: v_readlane_b32 s94, v40, 55 -; MUBUF-NEXT: v_readlane_b32 s93, v40, 54 -; MUBUF-NEXT: v_readlane_b32 s92, v40, 53 -; MUBUF-NEXT: v_readlane_b32 s91, v40, 52 -; MUBUF-NEXT: v_readlane_b32 s90, v40, 51 -; MUBUF-NEXT: v_readlane_b32 s89, v40, 50 -; MUBUF-NEXT: v_readlane_b32 s88, v40, 49 -; MUBUF-NEXT: v_readlane_b32 s87, v40, 48 -; MUBUF-NEXT: v_readlane_b32 s86, v40, 47 -; MUBUF-NEXT: v_readlane_b32 s85, v40, 46 -; MUBUF-NEXT: v_readlane_b32 s84, v40, 45 -; MUBUF-NEXT: v_readlane_b32 s83, v40, 44 -; MUBUF-NEXT: v_readlane_b32 s82, v40, 43 -; MUBUF-NEXT: v_readlane_b32 s81, v40, 42 -; MUBUF-NEXT: v_readlane_b32 s80, v40, 41 -; MUBUF-NEXT: v_readlane_b32 s79, v40, 40 -; MUBUF-NEXT: v_readlane_b32 s78, v40, 39 -; MUBUF-NEXT: v_readlane_b32 s77, v40, 38 -; MUBUF-NEXT: v_readlane_b32 s76, v40, 37 -; MUBUF-NEXT: v_readlane_b32 s75, v40, 36 -; MUBUF-NEXT: v_readlane_b32 s74, v40, 35 -; MUBUF-NEXT: v_readlane_b32 s73, v40, 34 -; MUBUF-NEXT: v_readlane_b32 s72, v40, 33 -; MUBUF-NEXT: v_readlane_b32 s71, v40, 32 -; MUBUF-NEXT: v_readlane_b32 s70, v40, 31 -; MUBUF-NEXT: v_readlane_b32 s69, v40, 30 -; MUBUF-NEXT: v_readlane_b32 s68, v40, 29 -; MUBUF-NEXT: v_readlane_b32 s67, v40, 28 -; MUBUF-NEXT: v_readlane_b32 s66, v40, 27 -; MUBUF-NEXT: v_readlane_b32 s65, v40, 26 -; MUBUF-NEXT: v_readlane_b32 s64, v40, 25 -; MUBUF-NEXT: v_readlane_b32 s63, v40, 24 -; MUBUF-NEXT: v_readlane_b32 s62, v40, 23 -; MUBUF-NEXT: v_readlane_b32 s61, v40, 22 -; MUBUF-NEXT: v_readlane_b32 s60, v40, 21 -; MUBUF-NEXT: v_readlane_b32 s59, v40, 20 -; MUBUF-NEXT: v_readlane_b32 s58, v40, 19 -; MUBUF-NEXT: v_readlane_b32 s57, v40, 18 -; MUBUF-NEXT: v_readlane_b32 s56, v40, 17 -; MUBUF-NEXT: v_readlane_b32 s55, v40, 16 -; MUBUF-NEXT: v_readlane_b32 s54, v40, 15 -; MUBUF-NEXT: v_readlane_b32 s53, v40, 14 -; MUBUF-NEXT: v_readlane_b32 s52, v40, 13 -; MUBUF-NEXT: v_readlane_b32 s51, v40, 12 -; MUBUF-NEXT: v_readlane_b32 s50, v40, 11 -; MUBUF-NEXT: v_readlane_b32 s49, v40, 10 -; MUBUF-NEXT: v_readlane_b32 s48, v40, 9 -; MUBUF-NEXT: v_readlane_b32 s47, v40, 8 -; MUBUF-NEXT: v_readlane_b32 s46, v40, 7 -; MUBUF-NEXT: v_readlane_b32 s45, v40, 6 -; MUBUF-NEXT: v_readlane_b32 s44, v40, 5 -; MUBUF-NEXT: v_readlane_b32 s43, v40, 4 -; MUBUF-NEXT: v_readlane_b32 s42, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s41, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s40, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s39, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v40, 32 +; MUBUF-NEXT: v_readlane_b32 s101, v40, 31 +; MUBUF-NEXT: v_readlane_b32 s100, v40, 30 +; MUBUF-NEXT: v_readlane_b32 s99, v40, 29 +; MUBUF-NEXT: v_readlane_b32 s98, v40, 28 +; MUBUF-NEXT: v_readlane_b32 s97, v40, 27 +; MUBUF-NEXT: v_readlane_b32 s96, v40, 26 +; MUBUF-NEXT: v_readlane_b32 s95, v40, 25 +; MUBUF-NEXT: v_readlane_b32 s94, v40, 24 +; MUBUF-NEXT: v_readlane_b32 s85, v40, 23 +; MUBUF-NEXT: v_readlane_b32 s84, v40, 22 +; MUBUF-NEXT: v_readlane_b32 s83, v40, 21 +; MUBUF-NEXT: v_readlane_b32 s82, v40, 20 +; MUBUF-NEXT: v_readlane_b32 s81, v40, 19 +; MUBUF-NEXT: v_readlane_b32 s80, v40, 18 +; MUBUF-NEXT: v_readlane_b32 s79, v40, 17 +; MUBUF-NEXT: v_readlane_b32 s78, v40, 16 +; MUBUF-NEXT: v_readlane_b32 s69, v40, 15 +; MUBUF-NEXT: v_readlane_b32 s68, v40, 14 +; MUBUF-NEXT: v_readlane_b32 s67, v40, 13 +; MUBUF-NEXT: v_readlane_b32 s66, v40, 12 +; MUBUF-NEXT: v_readlane_b32 s65, v40, 11 +; MUBUF-NEXT: v_readlane_b32 s64, v40, 10 +; MUBUF-NEXT: v_readlane_b32 s63, v40, 9 +; MUBUF-NEXT: v_readlane_b32 s62, v40, 8 +; MUBUF-NEXT: v_readlane_b32 s53, v40, 7 +; MUBUF-NEXT: v_readlane_b32 s52, v40, 6 +; MUBUF-NEXT: v_readlane_b32 s51, v40, 5 +; MUBUF-NEXT: v_readlane_b32 s50, v40, 4 +; MUBUF-NEXT: v_readlane_b32 s49, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s48, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s47, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s46, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: v_readlane_b32 s4, v41, 0 -; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: s_mov_b32 s33, s4 +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -2193,141 +1625,79 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v40, s39, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s40, 1 -; FLATSCR-NEXT: v_writelane_b32 v40, s41, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s42, 3 -; FLATSCR-NEXT: v_writelane_b32 v40, s43, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s44, 5 -; FLATSCR-NEXT: v_writelane_b32 v40, s45, 6 -; FLATSCR-NEXT: v_writelane_b32 v40, s46, 7 -; FLATSCR-NEXT: v_writelane_b32 v40, s47, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s48, 9 -; FLATSCR-NEXT: v_writelane_b32 v40, s49, 10 -; FLATSCR-NEXT: v_writelane_b32 v40, s50, 11 -; FLATSCR-NEXT: v_writelane_b32 v40, s51, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s52, 13 -; FLATSCR-NEXT: v_writelane_b32 v40, s53, 14 -; FLATSCR-NEXT: v_writelane_b32 v40, s54, 15 -; FLATSCR-NEXT: v_writelane_b32 v40, s55, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s56, 17 -; FLATSCR-NEXT: v_writelane_b32 v40, s57, 18 -; FLATSCR-NEXT: v_writelane_b32 v40, s58, 19 -; FLATSCR-NEXT: v_writelane_b32 v40, s59, 20 -; FLATSCR-NEXT: v_writelane_b32 v40, s60, 21 -; FLATSCR-NEXT: v_writelane_b32 v40, s61, 22 -; FLATSCR-NEXT: v_writelane_b32 v40, s62, 23 -; FLATSCR-NEXT: v_writelane_b32 v40, s63, 24 -; FLATSCR-NEXT: v_writelane_b32 v40, s64, 25 -; FLATSCR-NEXT: v_writelane_b32 v40, s65, 26 -; FLATSCR-NEXT: v_writelane_b32 v40, s66, 27 -; FLATSCR-NEXT: v_writelane_b32 v40, s67, 28 -; FLATSCR-NEXT: v_writelane_b32 v40, s68, 29 -; FLATSCR-NEXT: v_writelane_b32 v40, s69, 30 -; FLATSCR-NEXT: v_writelane_b32 v40, s70, 31 -; FLATSCR-NEXT: v_writelane_b32 v40, s71, 32 -; FLATSCR-NEXT: v_writelane_b32 v40, s72, 33 -; FLATSCR-NEXT: v_writelane_b32 v40, s73, 34 -; FLATSCR-NEXT: v_writelane_b32 v40, s74, 35 -; FLATSCR-NEXT: v_writelane_b32 v40, s75, 36 -; FLATSCR-NEXT: v_writelane_b32 v40, s76, 37 -; FLATSCR-NEXT: v_writelane_b32 v40, s77, 38 -; FLATSCR-NEXT: v_writelane_b32 v40, s78, 39 -; FLATSCR-NEXT: v_writelane_b32 v40, s79, 40 -; FLATSCR-NEXT: v_writelane_b32 v40, s80, 41 -; FLATSCR-NEXT: v_writelane_b32 v40, s81, 42 -; FLATSCR-NEXT: v_writelane_b32 v40, s82, 43 -; FLATSCR-NEXT: v_writelane_b32 v40, s83, 44 -; FLATSCR-NEXT: v_writelane_b32 v40, s84, 45 -; FLATSCR-NEXT: v_writelane_b32 v40, s85, 46 -; FLATSCR-NEXT: v_writelane_b32 v40, s86, 47 -; FLATSCR-NEXT: v_writelane_b32 v40, s87, 48 -; FLATSCR-NEXT: v_writelane_b32 v40, s88, 49 -; FLATSCR-NEXT: v_writelane_b32 v40, s89, 50 -; FLATSCR-NEXT: v_writelane_b32 v40, s90, 51 -; FLATSCR-NEXT: v_writelane_b32 v40, s91, 52 -; FLATSCR-NEXT: v_writelane_b32 v40, s92, 53 -; FLATSCR-NEXT: v_writelane_b32 v40, s93, 54 -; FLATSCR-NEXT: v_writelane_b32 v40, s94, 55 -; FLATSCR-NEXT: v_writelane_b32 v40, s95, 56 -; FLATSCR-NEXT: v_writelane_b32 v40, s96, 57 -; FLATSCR-NEXT: v_writelane_b32 v40, s97, 58 -; FLATSCR-NEXT: v_writelane_b32 v40, s98, 59 -; FLATSCR-NEXT: v_writelane_b32 v40, s99, 60 -; FLATSCR-NEXT: v_writelane_b32 v40, s100, 61 -; FLATSCR-NEXT: v_writelane_b32 v40, s101, 62 +; FLATSCR-NEXT: v_writelane_b32 v40, s46, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s47, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s48, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s49, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s50, 4 +; FLATSCR-NEXT: v_writelane_b32 v40, s51, 5 +; FLATSCR-NEXT: v_writelane_b32 v40, s52, 6 +; FLATSCR-NEXT: v_writelane_b32 v40, s53, 7 +; FLATSCR-NEXT: v_writelane_b32 v40, s62, 8 +; FLATSCR-NEXT: v_writelane_b32 v40, s63, 9 +; FLATSCR-NEXT: v_writelane_b32 v40, s64, 10 +; FLATSCR-NEXT: v_writelane_b32 v40, s65, 11 +; FLATSCR-NEXT: v_writelane_b32 v40, s66, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s67, 13 +; FLATSCR-NEXT: v_writelane_b32 v40, s68, 14 +; FLATSCR-NEXT: v_writelane_b32 v40, s69, 15 +; FLATSCR-NEXT: v_writelane_b32 v40, s78, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s79, 17 +; FLATSCR-NEXT: v_writelane_b32 v40, s80, 18 +; FLATSCR-NEXT: v_writelane_b32 v40, s81, 19 +; FLATSCR-NEXT: v_writelane_b32 v40, s82, 20 +; FLATSCR-NEXT: v_writelane_b32 v40, s83, 21 +; FLATSCR-NEXT: v_writelane_b32 v40, s84, 22 +; FLATSCR-NEXT: v_writelane_b32 v40, s85, 23 +; FLATSCR-NEXT: v_writelane_b32 v40, s94, 24 +; FLATSCR-NEXT: v_writelane_b32 v40, s95, 25 +; FLATSCR-NEXT: v_writelane_b32 v40, s96, 26 +; FLATSCR-NEXT: v_writelane_b32 v40, s97, 27 +; FLATSCR-NEXT: v_writelane_b32 v40, s98, 28 +; FLATSCR-NEXT: v_writelane_b32 v40, s99, 29 +; FLATSCR-NEXT: v_writelane_b32 v40, s100, 30 +; FLATSCR-NEXT: v_writelane_b32 v40, s101, 31 ; FLATSCR-NEXT: s_add_i32 s32, s32, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s102, 63 +; FLATSCR-NEXT: v_writelane_b32 v40, s102, 32 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s102, v40, 63 -; FLATSCR-NEXT: v_readlane_b32 s101, v40, 62 -; FLATSCR-NEXT: v_readlane_b32 s100, v40, 61 -; FLATSCR-NEXT: v_readlane_b32 s99, v40, 60 -; FLATSCR-NEXT: v_readlane_b32 s98, v40, 59 -; FLATSCR-NEXT: v_readlane_b32 s97, v40, 58 -; FLATSCR-NEXT: v_readlane_b32 s96, v40, 57 -; FLATSCR-NEXT: v_readlane_b32 s95, v40, 56 -; FLATSCR-NEXT: v_readlane_b32 s94, v40, 55 -; FLATSCR-NEXT: v_readlane_b32 s93, v40, 54 -; FLATSCR-NEXT: v_readlane_b32 s92, v40, 53 -; FLATSCR-NEXT: v_readlane_b32 s91, v40, 52 -; FLATSCR-NEXT: v_readlane_b32 s90, v40, 51 -; FLATSCR-NEXT: v_readlane_b32 s89, v40, 50 -; FLATSCR-NEXT: v_readlane_b32 s88, v40, 49 -; FLATSCR-NEXT: v_readlane_b32 s87, v40, 48 -; FLATSCR-NEXT: v_readlane_b32 s86, v40, 47 -; FLATSCR-NEXT: v_readlane_b32 s85, v40, 46 -; FLATSCR-NEXT: v_readlane_b32 s84, v40, 45 -; FLATSCR-NEXT: v_readlane_b32 s83, v40, 44 -; FLATSCR-NEXT: v_readlane_b32 s82, v40, 43 -; FLATSCR-NEXT: v_readlane_b32 s81, v40, 42 -; FLATSCR-NEXT: v_readlane_b32 s80, v40, 41 -; FLATSCR-NEXT: v_readlane_b32 s79, v40, 40 -; FLATSCR-NEXT: v_readlane_b32 s78, v40, 39 -; FLATSCR-NEXT: v_readlane_b32 s77, v40, 38 -; FLATSCR-NEXT: v_readlane_b32 s76, v40, 37 -; FLATSCR-NEXT: v_readlane_b32 s75, v40, 36 -; FLATSCR-NEXT: v_readlane_b32 s74, v40, 35 -; FLATSCR-NEXT: v_readlane_b32 s73, v40, 34 -; FLATSCR-NEXT: v_readlane_b32 s72, v40, 33 -; FLATSCR-NEXT: v_readlane_b32 s71, v40, 32 -; FLATSCR-NEXT: v_readlane_b32 s70, v40, 31 -; FLATSCR-NEXT: v_readlane_b32 s69, v40, 30 -; FLATSCR-NEXT: v_readlane_b32 s68, v40, 29 -; FLATSCR-NEXT: v_readlane_b32 s67, v40, 28 -; FLATSCR-NEXT: v_readlane_b32 s66, v40, 27 -; FLATSCR-NEXT: v_readlane_b32 s65, v40, 26 -; FLATSCR-NEXT: v_readlane_b32 s64, v40, 25 -; FLATSCR-NEXT: v_readlane_b32 s63, v40, 24 -; FLATSCR-NEXT: v_readlane_b32 s62, v40, 23 -; FLATSCR-NEXT: v_readlane_b32 s61, v40, 22 -; FLATSCR-NEXT: v_readlane_b32 s60, v40, 21 -; FLATSCR-NEXT: v_readlane_b32 s59, v40, 20 -; FLATSCR-NEXT: v_readlane_b32 s58, v40, 19 -; FLATSCR-NEXT: v_readlane_b32 s57, v40, 18 -; FLATSCR-NEXT: v_readlane_b32 s56, v40, 17 -; FLATSCR-NEXT: v_readlane_b32 s55, v40, 16 -; FLATSCR-NEXT: v_readlane_b32 s54, v40, 15 -; FLATSCR-NEXT: v_readlane_b32 s53, v40, 14 -; FLATSCR-NEXT: v_readlane_b32 s52, v40, 13 -; FLATSCR-NEXT: v_readlane_b32 s51, v40, 12 -; FLATSCR-NEXT: v_readlane_b32 s50, v40, 11 -; FLATSCR-NEXT: v_readlane_b32 s49, v40, 10 -; FLATSCR-NEXT: v_readlane_b32 s48, v40, 9 -; FLATSCR-NEXT: v_readlane_b32 s47, v40, 8 -; FLATSCR-NEXT: v_readlane_b32 s46, v40, 7 -; FLATSCR-NEXT: v_readlane_b32 s45, v40, 6 -; FLATSCR-NEXT: v_readlane_b32 s44, v40, 5 -; FLATSCR-NEXT: v_readlane_b32 s43, v40, 4 -; FLATSCR-NEXT: v_readlane_b32 s42, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s41, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s40, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s39, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v40, 32 +; FLATSCR-NEXT: v_readlane_b32 s101, v40, 31 +; FLATSCR-NEXT: v_readlane_b32 s100, v40, 30 +; FLATSCR-NEXT: v_readlane_b32 s99, v40, 29 +; FLATSCR-NEXT: v_readlane_b32 s98, v40, 28 +; FLATSCR-NEXT: v_readlane_b32 s97, v40, 27 +; FLATSCR-NEXT: v_readlane_b32 s96, v40, 26 +; FLATSCR-NEXT: v_readlane_b32 s95, v40, 25 +; FLATSCR-NEXT: v_readlane_b32 s94, v40, 24 +; FLATSCR-NEXT: v_readlane_b32 s85, v40, 23 +; FLATSCR-NEXT: v_readlane_b32 s84, v40, 22 +; FLATSCR-NEXT: v_readlane_b32 s83, v40, 21 +; FLATSCR-NEXT: v_readlane_b32 s82, v40, 20 +; FLATSCR-NEXT: v_readlane_b32 s81, v40, 19 +; FLATSCR-NEXT: v_readlane_b32 s80, v40, 18 +; FLATSCR-NEXT: v_readlane_b32 s79, v40, 17 +; FLATSCR-NEXT: v_readlane_b32 s78, v40, 16 +; FLATSCR-NEXT: v_readlane_b32 s69, v40, 15 +; FLATSCR-NEXT: v_readlane_b32 s68, v40, 14 +; FLATSCR-NEXT: v_readlane_b32 s67, v40, 13 +; FLATSCR-NEXT: v_readlane_b32 s66, v40, 12 +; FLATSCR-NEXT: v_readlane_b32 s65, v40, 11 +; FLATSCR-NEXT: v_readlane_b32 s64, v40, 10 +; FLATSCR-NEXT: v_readlane_b32 s63, v40, 9 +; FLATSCR-NEXT: v_readlane_b32 s62, v40, 8 +; FLATSCR-NEXT: v_readlane_b32 s53, v40, 7 +; FLATSCR-NEXT: v_readlane_b32 s52, v40, 6 +; FLATSCR-NEXT: v_readlane_b32 s51, v40, 5 +; FLATSCR-NEXT: v_readlane_b32 s50, v40, 4 +; FLATSCR-NEXT: v_readlane_b32 s49, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s48, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s47, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s46, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload @@ -2361,81 +1731,48 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-LABEL: spill_fp_to_memory_scratch_reg_needed_mubuf_offset: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, s33 +; MUBUF-NEXT: s_mov_b32 s38, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100 -; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 -; MUBUF-NEXT: v_writelane_b32 v39, s40, 1 -; MUBUF-NEXT: v_writelane_b32 v39, s41, 2 -; MUBUF-NEXT: v_writelane_b32 v39, s42, 3 -; MUBUF-NEXT: v_writelane_b32 v39, s43, 4 -; MUBUF-NEXT: v_writelane_b32 v39, s44, 5 -; MUBUF-NEXT: v_writelane_b32 v39, s45, 6 -; MUBUF-NEXT: v_writelane_b32 v39, s46, 7 -; MUBUF-NEXT: v_writelane_b32 v39, s47, 8 -; MUBUF-NEXT: v_writelane_b32 v39, s48, 9 -; MUBUF-NEXT: v_writelane_b32 v39, s49, 10 -; MUBUF-NEXT: v_writelane_b32 v39, s50, 11 -; MUBUF-NEXT: v_writelane_b32 v39, s51, 12 -; MUBUF-NEXT: v_writelane_b32 v39, s52, 13 -; MUBUF-NEXT: v_writelane_b32 v39, s53, 14 -; MUBUF-NEXT: v_writelane_b32 v39, s54, 15 -; MUBUF-NEXT: v_writelane_b32 v39, s55, 16 -; MUBUF-NEXT: v_writelane_b32 v39, s56, 17 -; MUBUF-NEXT: v_writelane_b32 v39, s57, 18 -; MUBUF-NEXT: v_writelane_b32 v39, s58, 19 -; MUBUF-NEXT: v_writelane_b32 v39, s59, 20 -; MUBUF-NEXT: v_writelane_b32 v39, s60, 21 -; MUBUF-NEXT: v_writelane_b32 v39, s61, 22 -; MUBUF-NEXT: v_writelane_b32 v39, s62, 23 -; MUBUF-NEXT: v_writelane_b32 v39, s63, 24 -; MUBUF-NEXT: v_writelane_b32 v39, s64, 25 -; MUBUF-NEXT: v_writelane_b32 v39, s65, 26 -; MUBUF-NEXT: v_writelane_b32 v39, s66, 27 -; MUBUF-NEXT: v_writelane_b32 v39, s67, 28 -; MUBUF-NEXT: v_writelane_b32 v39, s68, 29 -; MUBUF-NEXT: v_writelane_b32 v39, s69, 30 -; MUBUF-NEXT: v_writelane_b32 v39, s70, 31 -; MUBUF-NEXT: v_writelane_b32 v39, s71, 32 -; MUBUF-NEXT: v_writelane_b32 v39, s72, 33 -; MUBUF-NEXT: v_writelane_b32 v39, s73, 34 -; MUBUF-NEXT: v_writelane_b32 v39, s74, 35 -; MUBUF-NEXT: v_writelane_b32 v39, s75, 36 -; MUBUF-NEXT: v_writelane_b32 v39, s76, 37 -; MUBUF-NEXT: v_writelane_b32 v39, s77, 38 -; MUBUF-NEXT: v_writelane_b32 v39, s78, 39 -; MUBUF-NEXT: v_writelane_b32 v39, s79, 40 -; MUBUF-NEXT: v_writelane_b32 v39, s80, 41 -; MUBUF-NEXT: v_writelane_b32 v39, s81, 42 -; MUBUF-NEXT: v_writelane_b32 v39, s82, 43 -; MUBUF-NEXT: v_writelane_b32 v39, s83, 44 -; MUBUF-NEXT: v_writelane_b32 v39, s84, 45 -; MUBUF-NEXT: v_writelane_b32 v39, s85, 46 -; MUBUF-NEXT: v_writelane_b32 v39, s86, 47 -; MUBUF-NEXT: v_writelane_b32 v39, s87, 48 -; MUBUF-NEXT: v_writelane_b32 v39, s88, 49 -; MUBUF-NEXT: v_writelane_b32 v39, s89, 50 -; MUBUF-NEXT: v_writelane_b32 v39, s90, 51 -; MUBUF-NEXT: v_writelane_b32 v39, s91, 52 -; MUBUF-NEXT: v_writelane_b32 v39, s92, 53 -; MUBUF-NEXT: v_writelane_b32 v39, s93, 54 -; MUBUF-NEXT: v_writelane_b32 v39, s94, 55 -; MUBUF-NEXT: v_writelane_b32 v39, s95, 56 -; MUBUF-NEXT: v_writelane_b32 v39, s96, 57 -; MUBUF-NEXT: v_writelane_b32 v39, s97, 58 -; MUBUF-NEXT: v_writelane_b32 v39, s98, 59 -; MUBUF-NEXT: v_writelane_b32 v39, s99, 60 -; MUBUF-NEXT: v_mov_b32_e32 v0, s4 -; MUBUF-NEXT: s_add_i32 s5, s33, 0x40200 -; MUBUF-NEXT: v_writelane_b32 v39, s100, 61 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v39, s101, 62 +; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 +; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: v_writelane_b32 v39, s46, 0 +; MUBUF-NEXT: v_writelane_b32 v39, s47, 1 +; MUBUF-NEXT: v_writelane_b32 v39, s48, 2 +; MUBUF-NEXT: v_writelane_b32 v39, s49, 3 +; MUBUF-NEXT: v_writelane_b32 v39, s50, 4 +; MUBUF-NEXT: v_writelane_b32 v39, s51, 5 +; MUBUF-NEXT: v_writelane_b32 v39, s52, 6 +; MUBUF-NEXT: v_writelane_b32 v39, s53, 7 +; MUBUF-NEXT: v_writelane_b32 v39, s62, 8 +; MUBUF-NEXT: v_writelane_b32 v39, s63, 9 +; MUBUF-NEXT: v_writelane_b32 v39, s64, 10 +; MUBUF-NEXT: v_writelane_b32 v39, s65, 11 +; MUBUF-NEXT: v_writelane_b32 v39, s66, 12 +; MUBUF-NEXT: v_writelane_b32 v39, s67, 13 +; MUBUF-NEXT: v_writelane_b32 v39, s68, 14 +; MUBUF-NEXT: v_writelane_b32 v39, s69, 15 +; MUBUF-NEXT: v_writelane_b32 v39, s78, 16 +; MUBUF-NEXT: v_writelane_b32 v39, s79, 17 +; MUBUF-NEXT: v_writelane_b32 v39, s80, 18 +; MUBUF-NEXT: v_writelane_b32 v39, s81, 19 +; MUBUF-NEXT: v_writelane_b32 v39, s82, 20 +; MUBUF-NEXT: v_writelane_b32 v39, s83, 21 +; MUBUF-NEXT: v_writelane_b32 v39, s84, 22 +; MUBUF-NEXT: v_writelane_b32 v39, s85, 23 +; MUBUF-NEXT: v_writelane_b32 v39, s94, 24 +; MUBUF-NEXT: v_writelane_b32 v39, s95, 25 +; MUBUF-NEXT: v_writelane_b32 v39, s96, 26 +; MUBUF-NEXT: v_writelane_b32 v39, s97, 27 +; MUBUF-NEXT: v_writelane_b32 v39, s98, 28 +; MUBUF-NEXT: v_writelane_b32 v39, s99, 29 +; MUBUF-NEXT: v_writelane_b32 v39, s100, 30 +; MUBUF-NEXT: v_writelane_b32 v39, s101, 31 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 -; MUBUF-NEXT: v_writelane_b32 v39, s102, 63 +; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 +; MUBUF-NEXT: v_writelane_b32 v39, s102, 32 ; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART @@ -2444,81 +1781,45 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs except CSR v40 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_add_i32 s5, s33, 0x40200 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; MUBUF-NEXT: s_add_i32 s32, s32, 0x40400 -; MUBUF-NEXT: v_readlane_b32 s102, v39, 63 -; MUBUF-NEXT: v_readlane_b32 s101, v39, 62 -; MUBUF-NEXT: v_readlane_b32 s100, v39, 61 -; MUBUF-NEXT: v_readlane_b32 s99, v39, 60 -; MUBUF-NEXT: v_readlane_b32 s98, v39, 59 -; MUBUF-NEXT: v_readlane_b32 s97, v39, 58 -; MUBUF-NEXT: v_readlane_b32 s96, v39, 57 -; MUBUF-NEXT: v_readlane_b32 s95, v39, 56 -; MUBUF-NEXT: v_readlane_b32 s94, v39, 55 -; MUBUF-NEXT: v_readlane_b32 s93, v39, 54 -; MUBUF-NEXT: v_readlane_b32 s92, v39, 53 -; MUBUF-NEXT: v_readlane_b32 s91, v39, 52 -; MUBUF-NEXT: v_readlane_b32 s90, v39, 51 -; MUBUF-NEXT: v_readlane_b32 s89, v39, 50 -; MUBUF-NEXT: v_readlane_b32 s88, v39, 49 -; MUBUF-NEXT: v_readlane_b32 s87, v39, 48 -; MUBUF-NEXT: v_readlane_b32 s86, v39, 47 -; MUBUF-NEXT: v_readlane_b32 s85, v39, 46 -; MUBUF-NEXT: v_readlane_b32 s84, v39, 45 -; MUBUF-NEXT: v_readlane_b32 s83, v39, 44 -; MUBUF-NEXT: v_readlane_b32 s82, v39, 43 -; MUBUF-NEXT: v_readlane_b32 s81, v39, 42 -; MUBUF-NEXT: v_readlane_b32 s80, v39, 41 -; MUBUF-NEXT: v_readlane_b32 s79, v39, 40 -; MUBUF-NEXT: v_readlane_b32 s78, v39, 39 -; MUBUF-NEXT: v_readlane_b32 s77, v39, 38 -; MUBUF-NEXT: v_readlane_b32 s76, v39, 37 -; MUBUF-NEXT: v_readlane_b32 s75, v39, 36 -; MUBUF-NEXT: v_readlane_b32 s74, v39, 35 -; MUBUF-NEXT: v_readlane_b32 s73, v39, 34 -; MUBUF-NEXT: v_readlane_b32 s72, v39, 33 -; MUBUF-NEXT: v_readlane_b32 s71, v39, 32 -; MUBUF-NEXT: v_readlane_b32 s70, v39, 31 -; MUBUF-NEXT: v_readlane_b32 s69, v39, 30 -; MUBUF-NEXT: v_readlane_b32 s68, v39, 29 -; MUBUF-NEXT: v_readlane_b32 s67, v39, 28 -; MUBUF-NEXT: v_readlane_b32 s66, v39, 27 -; MUBUF-NEXT: v_readlane_b32 s65, v39, 26 -; MUBUF-NEXT: v_readlane_b32 s64, v39, 25 -; MUBUF-NEXT: v_readlane_b32 s63, v39, 24 -; MUBUF-NEXT: v_readlane_b32 s62, v39, 23 -; MUBUF-NEXT: v_readlane_b32 s61, v39, 22 -; MUBUF-NEXT: v_readlane_b32 s60, v39, 21 -; MUBUF-NEXT: v_readlane_b32 s59, v39, 20 -; MUBUF-NEXT: v_readlane_b32 s58, v39, 19 -; MUBUF-NEXT: v_readlane_b32 s57, v39, 18 -; MUBUF-NEXT: v_readlane_b32 s56, v39, 17 -; MUBUF-NEXT: v_readlane_b32 s55, v39, 16 -; MUBUF-NEXT: v_readlane_b32 s54, v39, 15 -; MUBUF-NEXT: v_readlane_b32 s53, v39, 14 -; MUBUF-NEXT: v_readlane_b32 s52, v39, 13 -; MUBUF-NEXT: v_readlane_b32 s51, v39, 12 -; MUBUF-NEXT: v_readlane_b32 s50, v39, 11 -; MUBUF-NEXT: v_readlane_b32 s49, v39, 10 -; MUBUF-NEXT: v_readlane_b32 s48, v39, 9 -; MUBUF-NEXT: v_readlane_b32 s47, v39, 8 -; MUBUF-NEXT: v_readlane_b32 s46, v39, 7 -; MUBUF-NEXT: v_readlane_b32 s45, v39, 6 -; MUBUF-NEXT: v_readlane_b32 s44, v39, 5 -; MUBUF-NEXT: v_readlane_b32 s43, v39, 4 -; MUBUF-NEXT: v_readlane_b32 s42, v39, 3 -; MUBUF-NEXT: v_readlane_b32 s41, v39, 2 -; MUBUF-NEXT: v_readlane_b32 s40, v39, 1 -; MUBUF-NEXT: v_readlane_b32 s39, v39, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v39, 32 +; MUBUF-NEXT: v_readlane_b32 s101, v39, 31 +; MUBUF-NEXT: v_readlane_b32 s100, v39, 30 +; MUBUF-NEXT: v_readlane_b32 s99, v39, 29 +; MUBUF-NEXT: v_readlane_b32 s98, v39, 28 +; MUBUF-NEXT: v_readlane_b32 s97, v39, 27 +; MUBUF-NEXT: v_readlane_b32 s96, v39, 26 +; MUBUF-NEXT: v_readlane_b32 s95, v39, 25 +; MUBUF-NEXT: v_readlane_b32 s94, v39, 24 +; MUBUF-NEXT: v_readlane_b32 s85, v39, 23 +; MUBUF-NEXT: v_readlane_b32 s84, v39, 22 +; MUBUF-NEXT: v_readlane_b32 s83, v39, 21 +; MUBUF-NEXT: v_readlane_b32 s82, v39, 20 +; MUBUF-NEXT: v_readlane_b32 s81, v39, 19 +; MUBUF-NEXT: v_readlane_b32 s80, v39, 18 +; MUBUF-NEXT: v_readlane_b32 s79, v39, 17 +; MUBUF-NEXT: v_readlane_b32 s78, v39, 16 +; MUBUF-NEXT: v_readlane_b32 s69, v39, 15 +; MUBUF-NEXT: v_readlane_b32 s68, v39, 14 +; MUBUF-NEXT: v_readlane_b32 s67, v39, 13 +; MUBUF-NEXT: v_readlane_b32 s66, v39, 12 +; MUBUF-NEXT: v_readlane_b32 s65, v39, 11 +; MUBUF-NEXT: v_readlane_b32 s64, v39, 10 +; MUBUF-NEXT: v_readlane_b32 s63, v39, 9 +; MUBUF-NEXT: v_readlane_b32 s62, v39, 8 +; MUBUF-NEXT: v_readlane_b32 s53, v39, 7 +; MUBUF-NEXT: v_readlane_b32 s52, v39, 6 +; MUBUF-NEXT: v_readlane_b32 s51, v39, 5 +; MUBUF-NEXT: v_readlane_b32 s50, v39, 4 +; MUBUF-NEXT: v_readlane_b32 s49, v39, 3 +; MUBUF-NEXT: v_readlane_b32 s48, v39, 2 +; MUBUF-NEXT: v_readlane_b32 s47, v39, 1 +; MUBUF-NEXT: v_readlane_b32 s46, v39, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_readfirstlane_b32 s4, v0 -; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100 -; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s5 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: s_mov_b32 s33, s4 +; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 +; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s6 ; 4-byte Folded Reload +; MUBUF-NEXT: s_mov_b64 exec, s[4:5] +; MUBUF-NEXT: s_mov_b32 s33, s38 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -2531,73 +1832,42 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v39, s1 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 -; FLATSCR-NEXT: v_writelane_b32 v39, s40, 1 -; FLATSCR-NEXT: v_writelane_b32 v39, s41, 2 -; FLATSCR-NEXT: v_writelane_b32 v39, s42, 3 -; FLATSCR-NEXT: v_writelane_b32 v39, s43, 4 -; FLATSCR-NEXT: v_writelane_b32 v39, s44, 5 -; FLATSCR-NEXT: v_writelane_b32 v39, s45, 6 -; FLATSCR-NEXT: v_writelane_b32 v39, s46, 7 -; FLATSCR-NEXT: v_writelane_b32 v39, s47, 8 -; FLATSCR-NEXT: v_writelane_b32 v39, s48, 9 -; FLATSCR-NEXT: v_writelane_b32 v39, s49, 10 -; FLATSCR-NEXT: v_writelane_b32 v39, s50, 11 -; FLATSCR-NEXT: v_writelane_b32 v39, s51, 12 -; FLATSCR-NEXT: v_writelane_b32 v39, s52, 13 -; FLATSCR-NEXT: v_writelane_b32 v39, s53, 14 -; FLATSCR-NEXT: v_writelane_b32 v39, s54, 15 -; FLATSCR-NEXT: v_writelane_b32 v39, s55, 16 -; FLATSCR-NEXT: v_writelane_b32 v39, s56, 17 -; FLATSCR-NEXT: v_writelane_b32 v39, s57, 18 -; FLATSCR-NEXT: v_writelane_b32 v39, s58, 19 -; FLATSCR-NEXT: v_writelane_b32 v39, s59, 20 -; FLATSCR-NEXT: v_writelane_b32 v39, s60, 21 -; FLATSCR-NEXT: v_writelane_b32 v39, s61, 22 -; FLATSCR-NEXT: v_writelane_b32 v39, s62, 23 -; FLATSCR-NEXT: v_writelane_b32 v39, s63, 24 -; FLATSCR-NEXT: v_writelane_b32 v39, s64, 25 -; FLATSCR-NEXT: v_writelane_b32 v39, s65, 26 -; FLATSCR-NEXT: v_writelane_b32 v39, s66, 27 -; FLATSCR-NEXT: v_writelane_b32 v39, s67, 28 -; FLATSCR-NEXT: v_writelane_b32 v39, s68, 29 -; FLATSCR-NEXT: v_writelane_b32 v39, s69, 30 -; FLATSCR-NEXT: v_writelane_b32 v39, s70, 31 -; FLATSCR-NEXT: v_writelane_b32 v39, s71, 32 -; FLATSCR-NEXT: v_writelane_b32 v39, s72, 33 -; FLATSCR-NEXT: v_writelane_b32 v39, s73, 34 -; FLATSCR-NEXT: v_writelane_b32 v39, s74, 35 -; FLATSCR-NEXT: v_writelane_b32 v39, s75, 36 -; FLATSCR-NEXT: v_writelane_b32 v39, s76, 37 -; FLATSCR-NEXT: v_writelane_b32 v39, s77, 38 -; FLATSCR-NEXT: v_writelane_b32 v39, s78, 39 -; FLATSCR-NEXT: v_writelane_b32 v39, s79, 40 -; FLATSCR-NEXT: v_writelane_b32 v39, s80, 41 -; FLATSCR-NEXT: v_writelane_b32 v39, s81, 42 -; FLATSCR-NEXT: v_writelane_b32 v39, s82, 43 -; FLATSCR-NEXT: v_writelane_b32 v39, s83, 44 -; FLATSCR-NEXT: v_writelane_b32 v39, s84, 45 -; FLATSCR-NEXT: v_writelane_b32 v39, s85, 46 -; FLATSCR-NEXT: v_writelane_b32 v39, s86, 47 -; FLATSCR-NEXT: v_writelane_b32 v39, s87, 48 -; FLATSCR-NEXT: v_writelane_b32 v39, s88, 49 -; FLATSCR-NEXT: v_writelane_b32 v39, s89, 50 -; FLATSCR-NEXT: v_writelane_b32 v39, s90, 51 -; FLATSCR-NEXT: v_writelane_b32 v39, s91, 52 -; FLATSCR-NEXT: v_writelane_b32 v39, s92, 53 -; FLATSCR-NEXT: v_writelane_b32 v39, s93, 54 -; FLATSCR-NEXT: v_writelane_b32 v39, s94, 55 -; FLATSCR-NEXT: v_writelane_b32 v39, s95, 56 -; FLATSCR-NEXT: v_writelane_b32 v39, s96, 57 -; FLATSCR-NEXT: v_writelane_b32 v39, s97, 58 -; FLATSCR-NEXT: v_writelane_b32 v39, s98, 59 -; FLATSCR-NEXT: v_writelane_b32 v39, s99, 60 +; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0 +; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1 +; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2 +; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3 +; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4 +; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5 +; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6 +; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7 +; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8 +; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9 +; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10 +; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11 +; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12 +; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13 +; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14 +; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15 +; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16 +; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17 +; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18 +; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19 +; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20 +; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21 +; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22 +; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23 +; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24 +; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25 +; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26 +; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27 +; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28 +; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29 ; FLATSCR-NEXT: s_addk_i32 s32, 0x100c -; FLATSCR-NEXT: v_writelane_b32 v39, s100, 61 -; FLATSCR-NEXT: v_writelane_b32 v39, s101, 62 +; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30 +; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000 -; FLATSCR-NEXT: v_writelane_b32 v39, s102, 63 +; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32 ; FLATSCR-NEXT: scratch_store_dword off, v0, s1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART @@ -2606,70 +1876,39 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s102, v39, 63 -; FLATSCR-NEXT: v_readlane_b32 s101, v39, 62 -; FLATSCR-NEXT: v_readlane_b32 s100, v39, 61 -; FLATSCR-NEXT: v_readlane_b32 s99, v39, 60 -; FLATSCR-NEXT: v_readlane_b32 s98, v39, 59 -; FLATSCR-NEXT: v_readlane_b32 s97, v39, 58 -; FLATSCR-NEXT: v_readlane_b32 s96, v39, 57 -; FLATSCR-NEXT: v_readlane_b32 s95, v39, 56 -; FLATSCR-NEXT: v_readlane_b32 s94, v39, 55 -; FLATSCR-NEXT: v_readlane_b32 s93, v39, 54 -; FLATSCR-NEXT: v_readlane_b32 s92, v39, 53 -; FLATSCR-NEXT: v_readlane_b32 s91, v39, 52 -; FLATSCR-NEXT: v_readlane_b32 s90, v39, 51 -; FLATSCR-NEXT: v_readlane_b32 s89, v39, 50 -; FLATSCR-NEXT: v_readlane_b32 s88, v39, 49 -; FLATSCR-NEXT: v_readlane_b32 s87, v39, 48 -; FLATSCR-NEXT: v_readlane_b32 s86, v39, 47 -; FLATSCR-NEXT: v_readlane_b32 s85, v39, 46 -; FLATSCR-NEXT: v_readlane_b32 s84, v39, 45 -; FLATSCR-NEXT: v_readlane_b32 s83, v39, 44 -; FLATSCR-NEXT: v_readlane_b32 s82, v39, 43 -; FLATSCR-NEXT: v_readlane_b32 s81, v39, 42 -; FLATSCR-NEXT: v_readlane_b32 s80, v39, 41 -; FLATSCR-NEXT: v_readlane_b32 s79, v39, 40 -; FLATSCR-NEXT: v_readlane_b32 s78, v39, 39 -; FLATSCR-NEXT: v_readlane_b32 s77, v39, 38 -; FLATSCR-NEXT: v_readlane_b32 s76, v39, 37 -; FLATSCR-NEXT: v_readlane_b32 s75, v39, 36 -; FLATSCR-NEXT: v_readlane_b32 s74, v39, 35 -; FLATSCR-NEXT: v_readlane_b32 s73, v39, 34 -; FLATSCR-NEXT: v_readlane_b32 s72, v39, 33 -; FLATSCR-NEXT: v_readlane_b32 s71, v39, 32 -; FLATSCR-NEXT: v_readlane_b32 s70, v39, 31 -; FLATSCR-NEXT: v_readlane_b32 s69, v39, 30 -; FLATSCR-NEXT: v_readlane_b32 s68, v39, 29 -; FLATSCR-NEXT: v_readlane_b32 s67, v39, 28 -; FLATSCR-NEXT: v_readlane_b32 s66, v39, 27 -; FLATSCR-NEXT: v_readlane_b32 s65, v39, 26 -; FLATSCR-NEXT: v_readlane_b32 s64, v39, 25 -; FLATSCR-NEXT: v_readlane_b32 s63, v39, 24 -; FLATSCR-NEXT: v_readlane_b32 s62, v39, 23 -; FLATSCR-NEXT: v_readlane_b32 s61, v39, 22 -; FLATSCR-NEXT: v_readlane_b32 s60, v39, 21 -; FLATSCR-NEXT: v_readlane_b32 s59, v39, 20 -; FLATSCR-NEXT: v_readlane_b32 s58, v39, 19 -; FLATSCR-NEXT: v_readlane_b32 s57, v39, 18 -; FLATSCR-NEXT: v_readlane_b32 s56, v39, 17 -; FLATSCR-NEXT: v_readlane_b32 s55, v39, 16 -; FLATSCR-NEXT: v_readlane_b32 s54, v39, 15 -; FLATSCR-NEXT: v_readlane_b32 s53, v39, 14 -; FLATSCR-NEXT: v_readlane_b32 s52, v39, 13 -; FLATSCR-NEXT: v_readlane_b32 s51, v39, 12 -; FLATSCR-NEXT: v_readlane_b32 s50, v39, 11 -; FLATSCR-NEXT: v_readlane_b32 s49, v39, 10 -; FLATSCR-NEXT: v_readlane_b32 s48, v39, 9 -; FLATSCR-NEXT: v_readlane_b32 s47, v39, 8 -; FLATSCR-NEXT: v_readlane_b32 s46, v39, 7 -; FLATSCR-NEXT: v_readlane_b32 s45, v39, 6 -; FLATSCR-NEXT: v_readlane_b32 s44, v39, 5 -; FLATSCR-NEXT: v_readlane_b32 s43, v39, 4 -; FLATSCR-NEXT: v_readlane_b32 s42, v39, 3 -; FLATSCR-NEXT: v_readlane_b32 s41, v39, 2 -; FLATSCR-NEXT: v_readlane_b32 s40, v39, 1 -; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32 +; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31 +; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30 +; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29 +; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28 +; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27 +; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26 +; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25 +; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24 +; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23 +; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22 +; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21 +; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20 +; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19 +; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18 +; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17 +; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16 +; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15 +; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14 +; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13 +; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12 +; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11 +; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10 +; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9 +; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8 +; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7 +; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6 +; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5 +; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4 +; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3 +; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2 +; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1 +; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004 diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index a14d515688a8b..0a3bf35427e24 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -14,15 +14,13 @@ body: | ; CHECK-LABEL: name: def_csr_sgpr ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47 + ; CHECK-NEXT: liveins: $sgpr46, $sgpr47 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0 - ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0 - ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 2, $vgpr0 - ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 3, $vgpr0 + ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 0, $vgpr0 + ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 1, $vgpr0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 4d9c85ef99dcd..4c2e3f426d29f 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1321,19 +1321,19 @@ bb: define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspace(3) %arg) { ; CI-LABEL: ds_read_call_read: ; CI: ; %bb.0: -; CI-NEXT: s_getpc_b64 s[40:41] -; CI-NEXT: s_mov_b32 s40, s0 -; CI-NEXT: s_load_dwordx4 s[40:43], s[40:41], 0x0 +; CI-NEXT: s_getpc_b64 s[64:65] +; CI-NEXT: s_mov_b32 s64, s0 +; CI-NEXT: s_load_dwordx4 s[64:67], s[64:65], 0x0 ; CI-NEXT: s_mov_b32 s14, s10 ; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_mov_b32 s12, s8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_add_u32 s40, s40, s11 +; CI-NEXT: s_add_u32 s64, s64, s11 ; CI-NEXT: s_mov_b64 s[10:11], s[6:7] -; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 +; CI-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x0 ; CI-NEXT: s_load_dword s6, s[4:5], 0x2 -; CI-NEXT: s_addc_u32 s41, s41, 0 +; CI-NEXT: s_addc_u32 s65, s65, 0 ; CI-NEXT: s_add_u32 s8, s4, 12 ; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; CI-NEXT: s_mov_b32 s13, s9 @@ -1345,36 +1345,36 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: s_mov_b64 s[4:5], s[0:1] ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] -; CI-NEXT: s_mov_b64 s[0:1], s[40:41] +; CI-NEXT: s_mov_b64 s[0:1], s[64:65] ; CI-NEXT: s_mov_b32 s17, void_func_void@abs32@hi ; CI-NEXT: s_mov_b32 s16, void_func_void@abs32@lo ; CI-NEXT: v_or_b32_e32 v31, v0, v2 -; CI-NEXT: s_mov_b64 s[2:3], s[42:43] +; CI-NEXT: s_mov_b64 s[2:3], s[66:67] ; CI-NEXT: s_mov_b32 s32, 0 -; CI-NEXT: s_mov_b32 s39, 0xf000 -; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s51, 0xf000 +; CI-NEXT: s_mov_b32 s50, -1 ; CI-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CI-NEXT: ds_read_b32 v0, v40 offset:4 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, v41, v0 -; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 +; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: ds_read_call_read: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_getpc_b64 s[36:37] -; GFX9-NEXT: s_mov_b32 s36, s0 -; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0 +; GFX9-NEXT: s_getpc_b64 s[48:49] +; GFX9-NEXT: s_mov_b32 s48, s0 +; GFX9-NEXT: s_load_dwordx4 s[48:51], s[48:49], 0x0 ; GFX9-NEXT: s_mov_b32 s14, s10 ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_mov_b32 s13, s9 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_add_u32 s48, s48, s11 ; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_add_u32 s8, s4, 12 ; GFX9-NEXT: s_addc_u32 s9, s5, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1383,11 +1383,11 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_mov_b32 s17, void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s16, void_func_void@abs32@lo ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll index 8b02bdbb70b7b..f671ea5f10cd8 100644 --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -28,63 +28,63 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: v_writelane_b32 v41, s35, 3 ; CHECK-NEXT: v_writelane_b32 v41, s36, 4 ; CHECK-NEXT: v_writelane_b32 v41, s37, 5 -; CHECK-NEXT: v_writelane_b32 v41, s38, 6 -; CHECK-NEXT: v_writelane_b32 v41, s39, 7 -; CHECK-NEXT: v_writelane_b32 v41, s40, 8 -; CHECK-NEXT: v_writelane_b32 v41, s41, 9 -; CHECK-NEXT: v_writelane_b32 v41, s42, 10 -; CHECK-NEXT: v_writelane_b32 v41, s43, 11 -; CHECK-NEXT: v_writelane_b32 v41, s44, 12 +; CHECK-NEXT: v_writelane_b32 v41, s46, 6 +; CHECK-NEXT: v_writelane_b32 v41, s47, 7 +; CHECK-NEXT: v_writelane_b32 v41, s48, 8 +; CHECK-NEXT: v_writelane_b32 v41, s49, 9 +; CHECK-NEXT: v_writelane_b32 v41, s50, 10 +; CHECK-NEXT: v_writelane_b32 v41, s51, 11 +; CHECK-NEXT: v_writelane_b32 v41, s52, 12 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v41, s45, 13 -; CHECK-NEXT: v_writelane_b32 v41, s46, 14 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v41, s53, 13 +; CHECK-NEXT: v_writelane_b32 v41, s62, 14 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 1 49 9 prologue_end ; dummy:49:9 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared@gotpcrel32@hi+12 -; CHECK-NEXT: v_writelane_b32 v41, s47, 15 -; CHECK-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: v_writelane_b32 v41, s63, 15 +; CHECK-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47] -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 -; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63] ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- [$vgpr0_vgpr1+0] ; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: flat_store_dword v[0:1], v2 -; CHECK-NEXT: v_readlane_b32 s47, v41, 15 -; CHECK-NEXT: v_readlane_b32 s46, v41, 14 -; CHECK-NEXT: v_readlane_b32 s45, v41, 13 -; CHECK-NEXT: v_readlane_b32 s44, v41, 12 -; CHECK-NEXT: v_readlane_b32 s43, v41, 11 -; CHECK-NEXT: v_readlane_b32 s42, v41, 10 -; CHECK-NEXT: v_readlane_b32 s41, v41, 9 -; CHECK-NEXT: v_readlane_b32 s40, v41, 8 -; CHECK-NEXT: v_readlane_b32 s39, v41, 7 -; CHECK-NEXT: v_readlane_b32 s38, v41, 6 +; CHECK-NEXT: v_readlane_b32 s63, v41, 15 +; CHECK-NEXT: v_readlane_b32 s62, v41, 14 +; CHECK-NEXT: v_readlane_b32 s53, v41, 13 +; CHECK-NEXT: v_readlane_b32 s52, v41, 12 +; CHECK-NEXT: v_readlane_b32 s51, v41, 11 +; CHECK-NEXT: v_readlane_b32 s50, v41, 10 +; CHECK-NEXT: v_readlane_b32 s49, v41, 9 +; CHECK-NEXT: v_readlane_b32 s48, v41, 8 +; CHECK-NEXT: v_readlane_b32 s47, v41, 7 +; CHECK-NEXT: v_readlane_b32 s46, v41, 6 ; CHECK-NEXT: v_readlane_b32 s37, v41, 5 ; CHECK-NEXT: v_readlane_b32 s36, v41, 4 ; CHECK-NEXT: v_readlane_b32 s35, v41, 3 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir index 0714def30053d..4dd03a17f7caa 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir @@ -142,9 +142,8 @@ body: | ; GFX1100-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22 ; GFX1100-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23 ; GFX1100-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27 - ; GFX1100-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, 8, implicit-def $scc - ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 $sgpr32 - ; GFX1100-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, -8, implicit-def $scc + ; GFX1100-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc + ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38 ; GFX1100-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec ; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec ; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec @@ -168,9 +167,8 @@ body: | ; GFX1200-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22 ; GFX1200-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23 ; GFX1200-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27 - ; GFX1200-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, 8, implicit-def $scc - ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 $sgpr32 - ; GFX1200-NEXT: $sgpr32 = S_ADD_I32 $sgpr32, -8, implicit-def $scc + ; GFX1200-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc + ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38 ; GFX1200-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec ; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec ; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec @@ -706,8 +704,9 @@ body: | ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 24, 64, $sgpr32, 0, implicit $exec - ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec + ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; GFX8-NEXT: $sgpr4 = S_MOV_B32 24 + ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -810,10 +809,10 @@ body: | ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1100-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc - ; GFX1100-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX1100-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi + ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc + ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc + ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 + ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -837,10 +836,10 @@ body: | ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1200-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc - ; GFX1200-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX1200-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi + ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc + ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc + ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 + ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -900,9 +899,9 @@ body: | ; GFX8-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 68, implicit $exec - ; GFX8-NEXT: $vgpr0 = V_MAD_U32_U24_e64 killed $vgpr0, 64, $sgpr32, 0, implicit $exec - ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $vgpr0, implicit $exec + ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec + ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68 + ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -1005,10 +1004,10 @@ body: | ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1100-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc - ; GFX1100-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX1100-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi + ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc + ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc + ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 + ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -1032,10 +1031,10 @@ body: | ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1200-NEXT: $vcc_hi = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc - ; GFX1200-NEXT: S_BITCMP1_B32 $vcc_hi, 0, implicit-def $scc - ; GFX1200-NEXT: $vcc_hi = S_BITSET0_B32 0, $vcc_hi - ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $vcc_hi + ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc + ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc + ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 + ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll index 32f255df82499..ff2fb986e7828 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -2060,9 +2060,9 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s29, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 vcc, -1 +; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, vcc +; GFX9-NEXT: s_mov_b64 exec, s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v2, s28 ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 ; GFX9-NEXT: v_mov_b32_e32 v5, s27 diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll index 22257d3eba7d6..3e84aa37fbcaa 100644 --- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll @@ -336,14 +336,14 @@ define amdgpu_kernel void @indirect_2_level_use_stack() #0 { ; GCN-LABEL: {{^}}multi_call_use_use_stack: ; GCN: .set multi_call_use_use_stack.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr) ; GCN: .set multi_call_use_use_stack.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr) -; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(44, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr) +; GCN: .set multi_call_use_use_stack.numbered_sgpr, max(52, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr) ; GCN: .set multi_call_use_use_stack.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) ; GCN: .set multi_call_use_use_stack.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc) ; GCN: .set multi_call_use_use_stack.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch) ; GCN: .set multi_call_use_use_stack.has_dyn_sized_stack, or(0, use_stack0.has_dyn_sized_stack, use_stack1.has_dyn_sized_stack) ; GCN: .set multi_call_use_use_stack.has_recursion, or(0, use_stack0.has_recursion, use_stack1.has_recursion) ; GCN: .set multi_call_use_use_stack.has_indirect_call, or(0, use_stack0.has_indirect_call, use_stack1.has_indirect_call) -; GCN: TotalNumSgprs: 50 +; GCN: TotalNumSgprs: 58 ; GCN: NumVgprs: 41 ; GCN: ScratchSize: 2052 define amdgpu_kernel void @multi_call_use_use_stack() #0 { @@ -357,7 +357,7 @@ declare void @external() #0 ; GCN-LABEL: {{^}}multi_call_with_external: ; GCN: .set multi_call_with_external.num_vgpr, max(41, amdgpu.max_num_vgpr) ; GCN: .set multi_call_with_external.num_agpr, max(0, amdgpu.max_num_agpr) -; GCN: .set multi_call_with_external.numbered_sgpr, max(44, amdgpu.max_num_sgpr) +; GCN: .set multi_call_with_external.numbered_sgpr, max(52, amdgpu.max_num_sgpr) ; GCN: .set multi_call_with_external.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) ; GCN: .set multi_call_with_external.uses_vcc, 1 ; GCN: .set multi_call_with_external.uses_flat_scratch, 1 @@ -377,7 +377,7 @@ define amdgpu_kernel void @multi_call_with_external() #0 { ; GCN-LABEL: {{^}}multi_call_with_external_and_duplicates: ; GCN: .set multi_call_with_external_and_duplicates.num_vgpr, max(41, amdgpu.max_num_vgpr) ; GCN: .set multi_call_with_external_and_duplicates.num_agpr, max(0, amdgpu.max_num_agpr) -; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(46, amdgpu.max_num_sgpr) +; GCN: .set multi_call_with_external_and_duplicates.numbered_sgpr, max(54, amdgpu.max_num_sgpr) ; GCN: .set multi_call_with_external_and_duplicates.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size)) ; GCN: .set multi_call_with_external_and_duplicates.uses_vcc, 1 ; GCN: .set multi_call_with_external_and_duplicates.uses_flat_scratch, 1 @@ -594,7 +594,7 @@ define amdgpu_kernel void @usage_multi_stage_recurse_noattrs(i32 %n) #0 { ; GCN-LABEL: {{^}}multi_call_with_multi_stage_recurse: ; GCN: .set multi_call_with_multi_stage_recurse.num_vgpr, max(41, use_stack0.num_vgpr, use_stack1.num_vgpr, multi_stage_recurse1.num_vgpr) ; GCN: .set multi_call_with_multi_stage_recurse.num_agpr, max(0, use_stack0.num_agpr, use_stack1.num_agpr, multi_stage_recurse1.num_agpr) -; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(45, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr) +; GCN: .set multi_call_with_multi_stage_recurse.numbered_sgpr, max(53, use_stack0.numbered_sgpr, use_stack1.numbered_sgpr, multi_stage_recurse1.numbered_sgpr) ; GCN: .set multi_call_with_multi_stage_recurse.private_seg_size, 0+(max(use_stack0.private_seg_size, use_stack1.private_seg_size, multi_stage_recurse1.private_seg_size)) ; GCN: .set multi_call_with_multi_stage_recurse.uses_vcc, or(1, use_stack0.uses_vcc, use_stack1.uses_vcc, multi_stage_recurse1.uses_vcc) ; GCN: .set multi_call_with_multi_stage_recurse.uses_flat_scratch, or(1, use_stack0.uses_flat_scratch, use_stack1.uses_flat_scratch, multi_stage_recurse1.uses_flat_scratch) diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll index 1ad365df2e8a8..be12d4be59106 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll @@ -40,12 +40,44 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: v_writelane_b32 v40, s28, 24 ; SDAG-NEXT: v_writelane_b32 v40, s29, 25 ; SDAG-NEXT: v_writelane_b32 v40, s30, 26 +; SDAG-NEXT: v_writelane_b32 v40, s31, 27 +; SDAG-NEXT: v_writelane_b32 v40, s70, 28 +; SDAG-NEXT: v_writelane_b32 v40, s71, 29 +; SDAG-NEXT: v_writelane_b32 v40, s72, 30 +; SDAG-NEXT: v_writelane_b32 v40, s73, 31 +; SDAG-NEXT: v_writelane_b32 v40, s74, 32 +; SDAG-NEXT: v_writelane_b32 v40, s75, 33 +; SDAG-NEXT: v_writelane_b32 v40, s76, 34 +; SDAG-NEXT: v_writelane_b32 v40, s77, 35 +; SDAG-NEXT: v_writelane_b32 v40, s86, 36 +; SDAG-NEXT: v_writelane_b32 v40, s87, 37 +; SDAG-NEXT: v_writelane_b32 v40, s88, 38 +; SDAG-NEXT: v_writelane_b32 v40, s89, 39 +; SDAG-NEXT: v_writelane_b32 v40, s90, 40 +; SDAG-NEXT: v_writelane_b32 v40, s91, 41 +; SDAG-NEXT: v_writelane_b32 v40, s92, 42 ; SDAG-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; SDAG-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; SDAG-NEXT: s_mov_b64 s[8:9], 0 ; SDAG-NEXT: s_addk_i32 s32, 0x400 -; SDAG-NEXT: v_writelane_b32 v40, s31, 27 +; SDAG-NEXT: v_writelane_b32 v40, s93, 43 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35] +; SDAG-NEXT: v_readlane_b32 s93, v40, 43 +; SDAG-NEXT: v_readlane_b32 s92, v40, 42 +; SDAG-NEXT: v_readlane_b32 s91, v40, 41 +; SDAG-NEXT: v_readlane_b32 s90, v40, 40 +; SDAG-NEXT: v_readlane_b32 s89, v40, 39 +; SDAG-NEXT: v_readlane_b32 s88, v40, 38 +; SDAG-NEXT: v_readlane_b32 s87, v40, 37 +; SDAG-NEXT: v_readlane_b32 s86, v40, 36 +; SDAG-NEXT: v_readlane_b32 s77, v40, 35 +; SDAG-NEXT: v_readlane_b32 s76, v40, 34 +; SDAG-NEXT: v_readlane_b32 s75, v40, 33 +; SDAG-NEXT: v_readlane_b32 s74, v40, 32 +; SDAG-NEXT: v_readlane_b32 s73, v40, 31 +; SDAG-NEXT: v_readlane_b32 s72, v40, 30 +; SDAG-NEXT: v_readlane_b32 s71, v40, 29 +; SDAG-NEXT: v_readlane_b32 s70, v40, 28 ; SDAG-NEXT: v_readlane_b32 s31, v40, 27 ; SDAG-NEXT: v_readlane_b32 s30, v40, 26 ; SDAG-NEXT: v_readlane_b32 s29, v40, 25 @@ -117,12 +149,44 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: v_writelane_b32 v40, s28, 24 ; GISEL-NEXT: v_writelane_b32 v40, s29, 25 ; GISEL-NEXT: v_writelane_b32 v40, s30, 26 +; GISEL-NEXT: v_writelane_b32 v40, s31, 27 +; GISEL-NEXT: v_writelane_b32 v40, s70, 28 +; GISEL-NEXT: v_writelane_b32 v40, s71, 29 +; GISEL-NEXT: v_writelane_b32 v40, s72, 30 +; GISEL-NEXT: v_writelane_b32 v40, s73, 31 +; GISEL-NEXT: v_writelane_b32 v40, s74, 32 +; GISEL-NEXT: v_writelane_b32 v40, s75, 33 +; GISEL-NEXT: v_writelane_b32 v40, s76, 34 +; GISEL-NEXT: v_writelane_b32 v40, s77, 35 +; GISEL-NEXT: v_writelane_b32 v40, s86, 36 +; GISEL-NEXT: v_writelane_b32 v40, s87, 37 +; GISEL-NEXT: v_writelane_b32 v40, s88, 38 +; GISEL-NEXT: v_writelane_b32 v40, s89, 39 +; GISEL-NEXT: v_writelane_b32 v40, s90, 40 +; GISEL-NEXT: v_writelane_b32 v40, s91, 41 +; GISEL-NEXT: v_writelane_b32 v40, s92, 42 ; GISEL-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; GISEL-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; GISEL-NEXT: s_mov_b64 s[8:9], 0 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s31, 27 +; GISEL-NEXT: v_writelane_b32 v40, s93, 43 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GISEL-NEXT: v_readlane_b32 s93, v40, 43 +; GISEL-NEXT: v_readlane_b32 s92, v40, 42 +; GISEL-NEXT: v_readlane_b32 s91, v40, 41 +; GISEL-NEXT: v_readlane_b32 s90, v40, 40 +; GISEL-NEXT: v_readlane_b32 s89, v40, 39 +; GISEL-NEXT: v_readlane_b32 s88, v40, 38 +; GISEL-NEXT: v_readlane_b32 s87, v40, 37 +; GISEL-NEXT: v_readlane_b32 s86, v40, 36 +; GISEL-NEXT: v_readlane_b32 s77, v40, 35 +; GISEL-NEXT: v_readlane_b32 s76, v40, 34 +; GISEL-NEXT: v_readlane_b32 s75, v40, 33 +; GISEL-NEXT: v_readlane_b32 s74, v40, 32 +; GISEL-NEXT: v_readlane_b32 s73, v40, 31 +; GISEL-NEXT: v_readlane_b32 s72, v40, 30 +; GISEL-NEXT: v_readlane_b32 s71, v40, 29 +; GISEL-NEXT: v_readlane_b32 s70, v40, 28 ; GISEL-NEXT: v_readlane_b32 s31, v40, 27 ; GISEL-NEXT: v_readlane_b32 s30, v40, 26 ; GISEL-NEXT: v_readlane_b32 s29, v40, 25 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 2e3ca34af4c74..ef230e4b877b0 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -9091,66 +9091,34 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s35, 3 ; GFX9-NEXT: v_writelane_b32 v40, s36, 4 ; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s40, 8 -; GFX9-NEXT: v_writelane_b32 v40, s41, 9 -; GFX9-NEXT: v_writelane_b32 v40, s42, 10 -; GFX9-NEXT: v_writelane_b32 v40, s43, 11 -; GFX9-NEXT: v_writelane_b32 v40, s44, 12 -; GFX9-NEXT: v_writelane_b32 v40, s45, 13 -; GFX9-NEXT: v_writelane_b32 v40, s46, 14 -; GFX9-NEXT: v_writelane_b32 v40, s47, 15 -; GFX9-NEXT: v_writelane_b32 v40, s48, 16 -; GFX9-NEXT: v_writelane_b32 v40, s49, 17 -; GFX9-NEXT: v_writelane_b32 v40, s50, 18 -; GFX9-NEXT: v_writelane_b32 v40, s51, 19 -; GFX9-NEXT: v_writelane_b32 v40, s52, 20 -; GFX9-NEXT: v_writelane_b32 v40, s53, 21 -; GFX9-NEXT: v_writelane_b32 v40, s54, 22 -; GFX9-NEXT: v_writelane_b32 v40, s55, 23 -; GFX9-NEXT: v_writelane_b32 v40, s56, 24 -; GFX9-NEXT: v_writelane_b32 v40, s57, 25 -; GFX9-NEXT: v_writelane_b32 v40, s58, 26 -; GFX9-NEXT: v_writelane_b32 v40, s59, 27 -; GFX9-NEXT: v_writelane_b32 v40, s60, 28 -; GFX9-NEXT: v_writelane_b32 v40, s61, 29 +; GFX9-NEXT: v_writelane_b32 v40, s46, 6 +; GFX9-NEXT: v_writelane_b32 v40, s47, 7 +; GFX9-NEXT: v_writelane_b32 v40, s48, 8 +; GFX9-NEXT: v_writelane_b32 v40, s49, 9 +; GFX9-NEXT: v_writelane_b32 v40, s50, 10 +; GFX9-NEXT: v_writelane_b32 v40, s51, 11 +; GFX9-NEXT: v_writelane_b32 v40, s52, 12 +; GFX9-NEXT: v_writelane_b32 v40, s53, 13 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s62, 30 +; GFX9-NEXT: v_writelane_b32 v40, s62, 14 ; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi ; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s63, 31 +; GFX9-NEXT: v_writelane_b32 v40, s63, 15 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s63, v40, 31 -; GFX9-NEXT: v_readlane_b32 s62, v40, 30 -; GFX9-NEXT: v_readlane_b32 s61, v40, 29 -; GFX9-NEXT: v_readlane_b32 s60, v40, 28 -; GFX9-NEXT: v_readlane_b32 s59, v40, 27 -; GFX9-NEXT: v_readlane_b32 s58, v40, 26 -; GFX9-NEXT: v_readlane_b32 s57, v40, 25 -; GFX9-NEXT: v_readlane_b32 s56, v40, 24 -; GFX9-NEXT: v_readlane_b32 s55, v40, 23 -; GFX9-NEXT: v_readlane_b32 s54, v40, 22 -; GFX9-NEXT: v_readlane_b32 s53, v40, 21 -; GFX9-NEXT: v_readlane_b32 s52, v40, 20 -; GFX9-NEXT: v_readlane_b32 s51, v40, 19 -; GFX9-NEXT: v_readlane_b32 s50, v40, 18 -; GFX9-NEXT: v_readlane_b32 s49, v40, 17 -; GFX9-NEXT: v_readlane_b32 s48, v40, 16 -; GFX9-NEXT: v_readlane_b32 s47, v40, 15 -; GFX9-NEXT: v_readlane_b32 s46, v40, 14 -; GFX9-NEXT: v_readlane_b32 s45, v40, 13 -; GFX9-NEXT: v_readlane_b32 s44, v40, 12 -; GFX9-NEXT: v_readlane_b32 s43, v40, 11 -; GFX9-NEXT: v_readlane_b32 s42, v40, 10 -; GFX9-NEXT: v_readlane_b32 s41, v40, 9 -; GFX9-NEXT: v_readlane_b32 s40, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 +; GFX9-NEXT: v_readlane_b32 s63, v40, 15 +; GFX9-NEXT: v_readlane_b32 s62, v40, 14 +; GFX9-NEXT: v_readlane_b32 s53, v40, 13 +; GFX9-NEXT: v_readlane_b32 s52, v40, 12 +; GFX9-NEXT: v_readlane_b32 s51, v40, 11 +; GFX9-NEXT: v_readlane_b32 s50, v40, 10 +; GFX9-NEXT: v_readlane_b32 s49, v40, 9 +; GFX9-NEXT: v_readlane_b32 s48, v40, 8 +; GFX9-NEXT: v_readlane_b32 s47, v40, 7 +; GFX9-NEXT: v_readlane_b32 s46, v40, 6 ; GFX9-NEXT: v_readlane_b32 s37, v40, 5 ; GFX9-NEXT: v_readlane_b32 s36, v40, 4 ; GFX9-NEXT: v_readlane_b32 s35, v40, 3 @@ -9191,59 +9159,27 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s35, 3 ; GFX10-NEXT: v_writelane_b32 v40, s36, 4 ; GFX10-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-NEXT: v_writelane_b32 v40, s40, 8 -; GFX10-NEXT: v_writelane_b32 v40, s41, 9 -; GFX10-NEXT: v_writelane_b32 v40, s42, 10 -; GFX10-NEXT: v_writelane_b32 v40, s43, 11 -; GFX10-NEXT: v_writelane_b32 v40, s44, 12 -; GFX10-NEXT: v_writelane_b32 v40, s45, 13 -; GFX10-NEXT: v_writelane_b32 v40, s46, 14 -; GFX10-NEXT: v_writelane_b32 v40, s47, 15 -; GFX10-NEXT: v_writelane_b32 v40, s48, 16 -; GFX10-NEXT: v_writelane_b32 v40, s49, 17 -; GFX10-NEXT: v_writelane_b32 v40, s50, 18 -; GFX10-NEXT: v_writelane_b32 v40, s51, 19 -; GFX10-NEXT: v_writelane_b32 v40, s52, 20 -; GFX10-NEXT: v_writelane_b32 v40, s53, 21 -; GFX10-NEXT: v_writelane_b32 v40, s54, 22 -; GFX10-NEXT: v_writelane_b32 v40, s55, 23 -; GFX10-NEXT: v_writelane_b32 v40, s56, 24 -; GFX10-NEXT: v_writelane_b32 v40, s57, 25 -; GFX10-NEXT: v_writelane_b32 v40, s58, 26 -; GFX10-NEXT: v_writelane_b32 v40, s59, 27 -; GFX10-NEXT: v_writelane_b32 v40, s60, 28 -; GFX10-NEXT: v_writelane_b32 v40, s61, 29 -; GFX10-NEXT: v_writelane_b32 v40, s62, 30 -; GFX10-NEXT: v_writelane_b32 v40, s63, 31 +; GFX10-NEXT: v_writelane_b32 v40, s46, 6 +; GFX10-NEXT: v_writelane_b32 v40, s47, 7 +; GFX10-NEXT: v_writelane_b32 v40, s48, 8 +; GFX10-NEXT: v_writelane_b32 v40, s49, 9 +; GFX10-NEXT: v_writelane_b32 v40, s50, 10 +; GFX10-NEXT: v_writelane_b32 v40, s51, 11 +; GFX10-NEXT: v_writelane_b32 v40, s52, 12 +; GFX10-NEXT: v_writelane_b32 v40, s53, 13 +; GFX10-NEXT: v_writelane_b32 v40, s62, 14 +; GFX10-NEXT: v_writelane_b32 v40, s63, 15 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s63, v40, 31 -; GFX10-NEXT: v_readlane_b32 s62, v40, 30 -; GFX10-NEXT: v_readlane_b32 s61, v40, 29 -; GFX10-NEXT: v_readlane_b32 s60, v40, 28 -; GFX10-NEXT: v_readlane_b32 s59, v40, 27 -; GFX10-NEXT: v_readlane_b32 s58, v40, 26 -; GFX10-NEXT: v_readlane_b32 s57, v40, 25 -; GFX10-NEXT: v_readlane_b32 s56, v40, 24 -; GFX10-NEXT: v_readlane_b32 s55, v40, 23 -; GFX10-NEXT: v_readlane_b32 s54, v40, 22 -; GFX10-NEXT: v_readlane_b32 s53, v40, 21 -; GFX10-NEXT: v_readlane_b32 s52, v40, 20 -; GFX10-NEXT: v_readlane_b32 s51, v40, 19 -; GFX10-NEXT: v_readlane_b32 s50, v40, 18 -; GFX10-NEXT: v_readlane_b32 s49, v40, 17 -; GFX10-NEXT: v_readlane_b32 s48, v40, 16 -; GFX10-NEXT: v_readlane_b32 s47, v40, 15 -; GFX10-NEXT: v_readlane_b32 s46, v40, 14 -; GFX10-NEXT: v_readlane_b32 s45, v40, 13 -; GFX10-NEXT: v_readlane_b32 s44, v40, 12 -; GFX10-NEXT: v_readlane_b32 s43, v40, 11 -; GFX10-NEXT: v_readlane_b32 s42, v40, 10 -; GFX10-NEXT: v_readlane_b32 s41, v40, 9 -; GFX10-NEXT: v_readlane_b32 s40, v40, 8 -; GFX10-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-NEXT: v_readlane_b32 s38, v40, 6 +; GFX10-NEXT: v_readlane_b32 s63, v40, 15 +; GFX10-NEXT: v_readlane_b32 s62, v40, 14 +; GFX10-NEXT: v_readlane_b32 s53, v40, 13 +; GFX10-NEXT: v_readlane_b32 s52, v40, 12 +; GFX10-NEXT: v_readlane_b32 s51, v40, 11 +; GFX10-NEXT: v_readlane_b32 s50, v40, 10 +; GFX10-NEXT: v_readlane_b32 s49, v40, 9 +; GFX10-NEXT: v_readlane_b32 s48, v40, 8 +; GFX10-NEXT: v_readlane_b32 s47, v40, 7 +; GFX10-NEXT: v_readlane_b32 s46, v40, 6 ; GFX10-NEXT: v_readlane_b32 s37, v40, 5 ; GFX10-NEXT: v_readlane_b32 s36, v40, 4 ; GFX10-NEXT: v_readlane_b32 s35, v40, 3 @@ -9279,61 +9215,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s35, 3 ; GFX11-NEXT: v_writelane_b32 v40, s36, 4 ; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s38, 6 -; GFX11-NEXT: v_writelane_b32 v40, s39, 7 -; GFX11-NEXT: v_writelane_b32 v40, s40, 8 -; GFX11-NEXT: v_writelane_b32 v40, s41, 9 -; GFX11-NEXT: v_writelane_b32 v40, s42, 10 -; GFX11-NEXT: v_writelane_b32 v40, s43, 11 -; GFX11-NEXT: v_writelane_b32 v40, s44, 12 -; GFX11-NEXT: v_writelane_b32 v40, s45, 13 -; GFX11-NEXT: v_writelane_b32 v40, s46, 14 -; GFX11-NEXT: v_writelane_b32 v40, s47, 15 -; GFX11-NEXT: v_writelane_b32 v40, s48, 16 -; GFX11-NEXT: v_writelane_b32 v40, s49, 17 -; GFX11-NEXT: v_writelane_b32 v40, s50, 18 -; GFX11-NEXT: v_writelane_b32 v40, s51, 19 -; GFX11-NEXT: v_writelane_b32 v40, s52, 20 -; GFX11-NEXT: v_writelane_b32 v40, s53, 21 -; GFX11-NEXT: v_writelane_b32 v40, s54, 22 -; GFX11-NEXT: v_writelane_b32 v40, s55, 23 -; GFX11-NEXT: v_writelane_b32 v40, s56, 24 -; GFX11-NEXT: v_writelane_b32 v40, s57, 25 -; GFX11-NEXT: v_writelane_b32 v40, s58, 26 -; GFX11-NEXT: v_writelane_b32 v40, s59, 27 -; GFX11-NEXT: v_writelane_b32 v40, s60, 28 -; GFX11-NEXT: v_writelane_b32 v40, s61, 29 -; GFX11-NEXT: v_writelane_b32 v40, s62, 30 -; GFX11-NEXT: v_writelane_b32 v40, s63, 31 +; GFX11-NEXT: v_writelane_b32 v40, s46, 6 +; GFX11-NEXT: v_writelane_b32 v40, s47, 7 +; GFX11-NEXT: v_writelane_b32 v40, s48, 8 +; GFX11-NEXT: v_writelane_b32 v40, s49, 9 +; GFX11-NEXT: v_writelane_b32 v40, s50, 10 +; GFX11-NEXT: v_writelane_b32 v40, s51, 11 +; GFX11-NEXT: v_writelane_b32 v40, s52, 12 +; GFX11-NEXT: v_writelane_b32 v40, s53, 13 +; GFX11-NEXT: v_writelane_b32 v40, s62, 14 +; GFX11-NEXT: v_writelane_b32 v40, s63, 15 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s63, v40, 31 -; GFX11-NEXT: v_readlane_b32 s62, v40, 30 -; GFX11-NEXT: v_readlane_b32 s61, v40, 29 -; GFX11-NEXT: v_readlane_b32 s60, v40, 28 -; GFX11-NEXT: v_readlane_b32 s59, v40, 27 -; GFX11-NEXT: v_readlane_b32 s58, v40, 26 -; GFX11-NEXT: v_readlane_b32 s57, v40, 25 -; GFX11-NEXT: v_readlane_b32 s56, v40, 24 -; GFX11-NEXT: v_readlane_b32 s55, v40, 23 -; GFX11-NEXT: v_readlane_b32 s54, v40, 22 -; GFX11-NEXT: v_readlane_b32 s53, v40, 21 -; GFX11-NEXT: v_readlane_b32 s52, v40, 20 -; GFX11-NEXT: v_readlane_b32 s51, v40, 19 -; GFX11-NEXT: v_readlane_b32 s50, v40, 18 -; GFX11-NEXT: v_readlane_b32 s49, v40, 17 -; GFX11-NEXT: v_readlane_b32 s48, v40, 16 -; GFX11-NEXT: v_readlane_b32 s47, v40, 15 -; GFX11-NEXT: v_readlane_b32 s46, v40, 14 -; GFX11-NEXT: v_readlane_b32 s45, v40, 13 -; GFX11-NEXT: v_readlane_b32 s44, v40, 12 -; GFX11-NEXT: v_readlane_b32 s43, v40, 11 -; GFX11-NEXT: v_readlane_b32 s42, v40, 10 -; GFX11-NEXT: v_readlane_b32 s41, v40, 9 -; GFX11-NEXT: v_readlane_b32 s40, v40, 8 -; GFX11-NEXT: v_readlane_b32 s39, v40, 7 -; GFX11-NEXT: v_readlane_b32 s38, v40, 6 +; GFX11-NEXT: v_readlane_b32 s63, v40, 15 +; GFX11-NEXT: v_readlane_b32 s62, v40, 14 +; GFX11-NEXT: v_readlane_b32 s53, v40, 13 +; GFX11-NEXT: v_readlane_b32 s52, v40, 12 +; GFX11-NEXT: v_readlane_b32 s51, v40, 11 +; GFX11-NEXT: v_readlane_b32 s50, v40, 10 +; GFX11-NEXT: v_readlane_b32 s49, v40, 9 +; GFX11-NEXT: v_readlane_b32 s48, v40, 8 +; GFX11-NEXT: v_readlane_b32 s47, v40, 7 +; GFX11-NEXT: v_readlane_b32 s46, v40, 6 ; GFX11-NEXT: v_readlane_b32 s37, v40, 5 ; GFX11-NEXT: v_readlane_b32 s36, v40, 4 ; GFX11-NEXT: v_readlane_b32 s35, v40, 3 @@ -9369,61 +9273,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 15be44a335a1d..3c85914536f28 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -365,12 +365,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -388,8 +388,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -437,12 +437,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -458,9 +458,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -504,13 +504,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -526,8 +526,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -571,13 +571,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -593,8 +593,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -735,19 +735,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -760,11 +760,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -773,7 +773,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -785,12 +785,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -806,9 +806,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -867,13 +867,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -889,8 +889,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -944,13 +944,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -966,8 +966,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1585,12 +1585,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1608,8 +1608,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1657,12 +1657,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1678,9 +1678,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1724,13 +1724,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1746,8 +1746,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1791,13 +1791,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1813,8 +1813,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1955,19 +1955,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -1980,11 +1980,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1993,7 +1993,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2005,12 +2005,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2026,9 +2026,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2087,13 +2087,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2109,8 +2109,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2164,13 +2164,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2186,8 +2186,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2865,12 +2865,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -2888,8 +2888,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -2937,12 +2937,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -2958,9 +2958,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3004,13 +3004,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -3026,8 +3026,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3071,13 +3071,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -3093,8 +3093,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3235,19 +3235,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -3260,11 +3260,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -3273,7 +3273,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -3285,12 +3285,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -3306,9 +3306,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3367,13 +3367,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3444,13 +3444,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -3466,8 +3466,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3641,12 +3641,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -3664,8 +3664,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -3713,12 +3713,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -3734,9 +3734,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3780,13 +3780,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -3802,8 +3802,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3847,13 +3847,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -3869,8 +3869,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4011,19 +4011,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -4036,11 +4036,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -4049,7 +4049,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -4061,12 +4061,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -4082,9 +4082,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4143,13 +4143,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -4165,8 +4165,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4220,13 +4220,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -4242,8 +4242,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4920,12 +4920,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -4943,8 +4943,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -4992,12 +4992,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5013,9 +5013,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5059,13 +5059,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5081,8 +5081,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5126,16 +5126,16 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 -; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 -; GFX1032-NEXT: s_mov_b32 s12, s8 -; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s13, s9 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_mov_b32 s12, s8 +; GFX1032-NEXT: s_add_u32 s8, s34, 44 +; GFX1032-NEXT: s_mov_b32 s13, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 ; GFX1032-NEXT: s_getpc_b64 s[4:5] ; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 @@ -5148,8 +5148,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5316,19 +5316,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -5341,11 +5341,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -5354,7 +5354,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -5366,12 +5366,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -5387,9 +5387,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5448,13 +5448,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -5470,8 +5470,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5525,13 +5525,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -5547,8 +5547,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5747,14 +5747,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 ; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3 @@ -5763,16 +5763,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_cbranch_execz .LBB9_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX7LESS-NEXT: v_cvt_f64_u32_e32 v[1:2], s2 ; GFX7LESS-NEXT: v_or_b32_e32 v4, v0, v4 @@ -5785,11 +5785,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -5802,64 +5802,64 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-NEXT: .LBB9_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_mov_b64 s[0:1], exec -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX9-NEXT: s_cbranch_execz .LBB9_3 ; GFX9-NEXT: ; %bb.1: ; GFX9-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s42, s9 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s50, s9 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start @@ -5872,68 +5872,68 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v3, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v3, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-NEXT: .LBB9_3: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[10:11], exec -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB9_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -5950,69 +5950,69 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-NEXT: .LBB9_3: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_mov_b32 s9, exec_lo -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: s_bcnt1_i32_b32 s0, s9 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6029,37 +6029,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-NEXT: .LBB9_3: ; GFX1032-NEXT: s_endpgm @@ -6070,7 +6070,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_mov_b64 s[10:11], exec ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -6079,16 +6079,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_cbranch_execz .LBB9_3 ; GFX1164-NEXT: ; %bb.1: ; GFX1164-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -6111,18 +6111,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6130,8 +6130,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-NEXT: .LBB9_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6142,24 +6142,24 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_mov_b32 s8, exec_lo ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB9_3 ; GFX1132-NEXT: ; %bb.1: ; GFX1132-NEXT: s_bcnt1_i32_b32 s0, s8 -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -6179,25 +6179,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-NEXT: .LBB9_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6206,14 +6206,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 ; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3 @@ -6222,16 +6222,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX7LESS-DPP-NEXT: v_cvt_f64_u32_e32 v[1:2], s2 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v4, v0, v4 @@ -6244,11 +6244,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -6261,64 +6261,64 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-DPP-NEXT: .LBB9_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[0:1], exec -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX9-DPP-NEXT: ; %bb.1: ; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start @@ -6331,68 +6331,68 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-DPP-NEXT: .LBB9_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], exec -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6409,69 +6409,69 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-DPP-NEXT: .LBB9_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, s9 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6488,37 +6488,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-DPP-NEXT: .LBB9_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -6529,7 +6529,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], exec ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -6538,16 +6538,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -6570,18 +6570,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6589,8 +6589,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-DPP-NEXT: .LBB9_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6601,24 +6601,24 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s8, exec_lo ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1132-DPP-NEXT: ; %bb.1: ; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, s8 -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -6638,25 +6638,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-DPP-NEXT: .LBB9_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6669,19 +6669,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -6692,15 +6692,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -6725,21 +6725,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -6752,44 +6752,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4 ; GFX7LESS-NEXT: .LBB10_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -6798,17 +6798,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6833,11 +6833,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB10_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -6848,53 +6848,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB10_4 ; GFX9-NEXT: .LBB10_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -6903,17 +6903,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6938,11 +6938,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB10_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -6952,55 +6952,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1064-NEXT: .LBB10_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7009,17 +7009,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7038,16 +7038,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB10_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -7057,37 +7057,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1032-NEXT: .LBB10_5: ; GFX1032-NEXT: s_endpgm @@ -7095,11 +7095,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7107,15 +7107,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -7143,11 +7143,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB10_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB10_4: ; %atomicrmw.start @@ -7164,18 +7164,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7183,8 +7183,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1164-NEXT: .LBB10_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7193,7 +7193,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -7202,9 +7202,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -7213,7 +7213,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -7233,17 +7233,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB10_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB10_4: ; %atomicrmw.start @@ -7258,25 +7258,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1132-NEXT: .LBB10_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7285,22 +7285,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -7311,30 +7311,30 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -7347,44 +7347,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7393,17 +7393,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7450,74 +7450,74 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[44:45] +; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[52:53] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-DPP-NEXT: .LBB10_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7526,17 +7526,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7581,10 +7581,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) @@ -7594,55 +7594,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7651,17 +7651,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7696,14 +7696,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -7713,37 +7713,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -7751,11 +7751,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7763,15 +7763,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -7825,10 +7825,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7845,18 +7845,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7864,8 +7864,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7874,7 +7874,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -7883,9 +7883,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -7894,7 +7894,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -7936,14 +7936,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7958,25 +7958,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8508,12 +8508,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -8531,8 +8531,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -8585,12 +8585,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -8606,9 +8606,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8655,13 +8655,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -8677,8 +8677,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8725,13 +8725,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -8747,8 +8747,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8922,19 +8922,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -8947,11 +8947,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -8962,7 +8962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -8975,12 +8975,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -8996,9 +8996,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9074,13 +9074,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -9096,8 +9096,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9163,13 +9163,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -9185,8 +9185,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9941,12 +9941,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -9964,8 +9964,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -10018,12 +10018,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -10039,9 +10039,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10088,13 +10088,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -10110,8 +10110,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10158,13 +10158,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -10180,8 +10180,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10355,19 +10355,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -10380,11 +10380,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -10395,7 +10395,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -10408,12 +10408,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -10429,9 +10429,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10507,13 +10507,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -10529,8 +10529,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10596,13 +10596,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -10618,8 +10618,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10856,12 +10856,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -10879,8 +10879,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -10933,12 +10933,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -10954,9 +10954,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11003,13 +11003,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -11025,8 +11025,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11073,13 +11073,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -11095,8 +11095,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11270,19 +11270,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -11295,11 +11295,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -11310,7 +11310,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -11323,12 +11323,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -11344,9 +11344,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11422,13 +11422,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -11444,8 +11444,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11511,13 +11511,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -11533,8 +11533,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11771,13 +11771,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0 ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX7LESS-NEXT: s_mov_b32 s1, 0x43300000 @@ -11791,15 +11791,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_cbranch_execz .LBB16_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -11811,11 +11811,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -11828,40 +11828,40 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-NEXT: .LBB16_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc3300000 ; GFX9-NEXT: s_mov_b32 s1, 0x43300000 @@ -11874,20 +11874,20 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-NEXT: s_cbranch_execz .LBB16_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start @@ -11900,48 +11900,48 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-NEXT: .LBB16_3: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1064-NEXT: s_mov_b32 s1, 0x43300000 ; GFX1064-NEXT: s_movk_i32 s32, 0x800 @@ -11953,19 +11953,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB16_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 -; GFX1064-NEXT: s_mov_b32 s42, s9 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s50, s9 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-NEXT: v_mov_b32_e32 v1, s0 @@ -11978,53 +11978,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-NEXT: .LBB16_3: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12033,18 +12033,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB16_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 -; GFX1032-NEXT: s_mov_b32 s42, s9 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s50, s9 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 @@ -12057,44 +12057,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-NEXT: .LBB16_3: ; GFX1032-NEXT: s_endpgm ; ; GFX1164-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1164: ; %bb.0: -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 @@ -12115,16 +12115,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-NEXT: s_cbranch_execz .LBB16_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: s_mov_b32 s33, s10 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-NEXT: v_mov_b32_e32 v1, s0 @@ -12145,18 +12145,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12164,8 +12164,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-NEXT: .LBB16_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12173,12 +12173,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; ; GFX1132-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1132: ; %bb.0: -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_clause 0x1 ; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12193,15 +12193,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB16_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 @@ -12219,25 +12219,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-NEXT: .LBB16_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12246,13 +12246,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0 ; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX7LESS-DPP-NEXT: s_mov_b32 s1, 0x43300000 @@ -12266,15 +12266,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -12286,11 +12286,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -12303,40 +12303,40 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-DPP-NEXT: .LBB16_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000 ; GFX9-DPP-NEXT: s_mov_b32 s1, 0x43300000 @@ -12349,20 +12349,20 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start @@ -12375,48 +12375,48 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-DPP-NEXT: .LBB16_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1064-DPP-NEXT: s_mov_b32 s1, 0x43300000 ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 @@ -12428,19 +12428,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -12453,53 +12453,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-DPP-NEXT: .LBB16_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12508,18 +12508,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -12532,44 +12532,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-DPP-NEXT: .LBB16_3: ; GFX1032-DPP-NEXT: s_endpgm ; ; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1164-DPP: ; %bb.0: -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 @@ -12590,16 +12590,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -12620,18 +12620,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12639,8 +12639,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-DPP-NEXT: .LBB16_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12648,12 +12648,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; ; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_uni_value_default_scope_strictfp: ; GFX1132-DPP: ; %bb.0: -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_clause 0x1 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12668,15 +12668,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -12694,25 +12694,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-DPP-NEXT: .LBB16_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12725,19 +12725,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -12748,15 +12748,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -12781,21 +12781,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -12808,44 +12808,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4 ; GFX7LESS-NEXT: .LBB17_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -12854,17 +12854,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -12889,11 +12889,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB17_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -12904,53 +12904,53 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB17_4 ; GFX9-NEXT: .LBB17_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -12959,17 +12959,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -12994,11 +12994,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB17_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -13008,55 +13008,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1064-NEXT: .LBB17_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13065,17 +13065,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13094,16 +13094,16 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB17_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -13113,37 +13113,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1032-NEXT: .LBB17_5: ; GFX1032-NEXT: s_endpgm @@ -13151,11 +13151,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13163,15 +13163,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -13199,11 +13199,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB17_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13220,18 +13220,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13239,8 +13239,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1164-NEXT: .LBB17_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13249,7 +13249,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -13258,9 +13258,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -13269,7 +13269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -13289,17 +13289,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB17_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13314,25 +13314,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1132-NEXT: .LBB17_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13341,22 +13341,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -13367,30 +13367,30 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -13403,44 +13403,44 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13449,17 +13449,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13506,74 +13506,74 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[44:45] +; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], s[52:53] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX9-DPP-NEXT: .LBB17_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13582,17 +13582,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13637,10 +13637,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) @@ -13650,55 +13650,55 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1064-DPP-NEXT: .LBB17_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13707,17 +13707,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13752,14 +13752,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -13769,37 +13769,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1032-DPP-NEXT: .LBB17_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -13807,11 +13807,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13819,15 +13819,15 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -13881,10 +13881,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB17_2: ; %atomicrmw.start @@ -13901,18 +13901,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13920,8 +13920,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1164-DPP-NEXT: .LBB17_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13930,7 +13930,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -13939,9 +13939,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -13950,7 +13950,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -13992,14 +13992,14 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB17_2: ; %atomicrmw.start @@ -14014,25 +14014,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1132-DPP-NEXT: .LBB17_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index a4410bb9ed2d0..cc9c310e5c059 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start @@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start @@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start @@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3135,13 +3135,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -3149,15 +3149,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3169,8 +3169,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 @@ -3178,8 +3178,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -3188,59 +3188,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-NEXT: .LBB6_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-NEXT: s_cbranch_execz .LBB6_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3253,36 +3253,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-NEXT: .LBB6_3: ; GFX9-NEXT: s_endpgm @@ -3290,32 +3290,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB6_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 -; GFX1064-NEXT: s_mov_b32 s42, s9 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s50, s9 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-NEXT: v_mov_b32_e32 v1, s0 @@ -3328,38 +3328,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-NEXT: .LBB6_3: ; GFX1064-NEXT: s_endpgm @@ -3367,31 +3367,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 -; GFX1032-NEXT: s_mov_b32 s42, s9 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s50, s9 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 @@ -3404,38 +3404,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-NEXT: .LBB6_3: ; GFX1032-NEXT: s_endpgm @@ -3444,7 +3444,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164: ; %bb.0: ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3452,16 +3452,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-NEXT: s_cbranch_execz .LBB6_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: s_mov_b32 s33, s10 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-NEXT: v_mov_b32_e32 v1, s0 @@ -3483,18 +3483,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-NEXT: .LBB6_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3513,22 +3513,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132: ; %bb.0: ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB6_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 @@ -3547,26 +3547,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-NEXT: .LBB6_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3575,13 +3575,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -3589,15 +3589,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3609,8 +3609,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 @@ -3618,8 +3618,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -3628,59 +3628,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-DPP-NEXT: .LBB6_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3693,36 +3693,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-DPP-NEXT: .LBB6_3: ; GFX9-DPP-NEXT: s_endpgm @@ -3730,32 +3730,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: ; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -3768,38 +3768,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-DPP-NEXT: .LBB6_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -3807,31 +3807,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: ; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -3844,38 +3844,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-DPP-NEXT: .LBB6_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -3884,7 +3884,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3892,16 +3892,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -3923,18 +3923,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-DPP-NEXT: .LBB6_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3953,22 +3953,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -3987,26 +3987,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-DPP-NEXT: .LBB6_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4019,19 +4019,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -4042,15 +4042,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42] @@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -4107,43 +4107,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4 ; GFX7LESS-NEXT: .LBB7_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4152,17 +4152,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4189,12 +4189,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB7_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4205,54 +4205,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB7_4 ; GFX9-NEXT: .LBB7_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4261,17 +4261,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4298,12 +4298,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB7_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -4313,56 +4313,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1064-NEXT: .LBB7_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4371,17 +4371,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4402,17 +4402,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB7_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -4422,38 +4422,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1032-NEXT: .LBB7_5: ; GFX1032-NEXT: s_endpgm @@ -4461,11 +4461,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4473,15 +4473,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -4511,12 +4511,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB7_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4530,15 +4530,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off @@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1164-NEXT: .LBB7_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4563,7 +4563,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -4572,9 +4572,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -4606,19 +4606,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB7_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4632,16 +4632,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1132-NEXT: v_mov_b32_e32 v31, v40 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1132-NEXT: v_mov_b32_e32 v3, s45 +; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: v_mov_b32_e32 v6, 0 -; GFX1132-NEXT: v_mov_b32_e32 v2, s44 +; GFX1132-NEXT: v_mov_b32_e32 v2, s52 ; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off ; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8 @@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1132-NEXT: .LBB7_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -4689,34 +4689,34 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -4725,43 +4725,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4834,20 +4834,20 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45] +; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -4856,54 +4856,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX9-DPP-NEXT: .LBB7_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4912,17 +4912,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4975,10 +4975,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42] @@ -4989,56 +4989,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1064-DPP-NEXT: .LBB7_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -5047,17 +5047,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5098,15 +5098,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -5116,38 +5116,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1032-DPP-NEXT: .LBB7_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -5155,11 +5155,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -5167,15 +5167,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5239,11 +5239,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start @@ -5261,18 +5261,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1164-DPP-NEXT: .LBB7_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5290,7 +5290,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -5299,9 +5299,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5356,16 +5356,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start @@ -5381,26 +5381,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1132-DPP-NEXT: .LBB7_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start @@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6683,13 +6683,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -6697,15 +6697,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -6717,8 +6717,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 @@ -6726,8 +6726,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -6736,59 +6736,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-NEXT: .LBB10_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start @@ -6801,36 +6801,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-NEXT: .LBB10_3: ; GFX9-NEXT: s_endpgm @@ -6838,32 +6838,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 -; GFX1064-NEXT: s_mov_b32 s42, s9 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s50, s9 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-NEXT: v_mov_b32_e32 v1, s0 @@ -6876,38 +6876,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-NEXT: .LBB10_3: ; GFX1064-NEXT: s_endpgm @@ -6915,31 +6915,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 -; GFX1032-NEXT: s_mov_b32 s42, s9 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s50, s9 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 @@ -6952,38 +6952,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-NEXT: .LBB10_3: ; GFX1032-NEXT: s_endpgm @@ -6992,7 +6992,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164: ; %bb.0: ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -7000,16 +7000,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: s_mov_b32 s33, s10 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-NEXT: v_mov_b32_e32 v1, s0 @@ -7031,18 +7031,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-NEXT: .LBB10_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7061,22 +7061,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132: ; %bb.0: ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB10_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7095,26 +7095,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-NEXT: .LBB10_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7123,13 +7123,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -7137,15 +7137,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -7157,8 +7157,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], 4.0 @@ -7166,8 +7166,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -7176,59 +7176,59 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-DPP-NEXT: .LBB10_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7241,36 +7241,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-DPP-NEXT: .LBB10_3: ; GFX9-DPP-NEXT: s_endpgm @@ -7278,32 +7278,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: ; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -7316,38 +7316,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -7355,31 +7355,31 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: ; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -7392,38 +7392,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -7432,7 +7432,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -7440,16 +7440,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -7471,18 +7471,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7501,22 +7501,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7535,26 +7535,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7567,19 +7567,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -7590,15 +7590,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42] @@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -7655,43 +7655,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7LESS-NEXT: .LBB11_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -7700,17 +7700,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7737,12 +7737,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB11_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -7753,54 +7753,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB11_4 ; GFX9-NEXT: .LBB11_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -7809,17 +7809,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7846,12 +7846,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB11_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -7861,56 +7861,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1064-NEXT: .LBB11_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -7919,17 +7919,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7950,17 +7950,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB11_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -7970,38 +7970,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1032-NEXT: .LBB11_5: ; GFX1032-NEXT: s_endpgm @@ -8009,11 +8009,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8021,15 +8021,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -8059,12 +8059,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB11_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start @@ -8078,15 +8078,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off @@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1164-NEXT: .LBB11_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8111,7 +8111,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -8120,9 +8120,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -8154,19 +8154,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB11_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start @@ -8180,16 +8180,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1132-NEXT: v_mov_b32_e32 v31, v40 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1132-NEXT: v_mov_b32_e32 v3, s45 +; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: v_mov_b32_e32 v6, 0 -; GFX1132-NEXT: v_mov_b32_e32 v2, s44 +; GFX1132-NEXT: v_mov_b32_e32 v2, s52 ; GFX1132-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off ; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8 @@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1132-NEXT: .LBB11_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8237,34 +8237,34 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -8273,43 +8273,43 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8382,20 +8382,20 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45] +; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -8404,54 +8404,54 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX9-DPP-NEXT: .LBB11_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8460,17 +8460,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8523,10 +8523,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42] @@ -8537,56 +8537,56 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1064-DPP-NEXT: .LBB11_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8595,17 +8595,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8646,15 +8646,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -8664,38 +8664,38 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1032-DPP-NEXT: .LBB11_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -8703,11 +8703,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8715,15 +8715,15 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8787,11 +8787,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start @@ -8809,18 +8809,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1164-DPP-NEXT: .LBB11_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8838,7 +8838,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8847,9 +8847,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8904,16 +8904,16 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start @@ -8929,26 +8929,26 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_max_f64 v[3:4], v[3:4], v[41:42] ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1132-DPP-NEXT: .LBB11_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index 68d7dcc60506c..81a16df17c728 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start @@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start @@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start @@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3135,13 +3135,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -3149,15 +3149,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_cbranch_execz .LBB6_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3169,8 +3169,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], 4.0 @@ -3178,8 +3178,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -3188,59 +3188,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-NEXT: .LBB6_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-NEXT: s_cbranch_execz .LBB6_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3253,36 +3253,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-NEXT: .LBB6_3: ; GFX9-NEXT: s_endpgm @@ -3290,32 +3290,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB6_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 -; GFX1064-NEXT: s_mov_b32 s42, s9 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s50, s9 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-NEXT: v_mov_b32_e32 v1, s0 @@ -3328,38 +3328,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-NEXT: .LBB6_3: ; GFX1064-NEXT: s_endpgm @@ -3367,31 +3367,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 -; GFX1032-NEXT: s_mov_b32 s42, s9 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s50, s9 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 @@ -3404,38 +3404,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-NEXT: .LBB6_3: ; GFX1032-NEXT: s_endpgm @@ -3444,7 +3444,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164: ; %bb.0: ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3452,16 +3452,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-NEXT: s_cbranch_execz .LBB6_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: s_mov_b32 s33, s10 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-NEXT: v_mov_b32_e32 v1, s0 @@ -3483,18 +3483,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-NEXT: .LBB6_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3513,22 +3513,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132: ; %bb.0: ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB6_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 @@ -3547,26 +3547,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-NEXT: .LBB6_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3575,13 +3575,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -3589,15 +3589,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3609,8 +3609,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[2:3], 4.0 @@ -3618,8 +3618,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -3628,59 +3628,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-DPP-NEXT: .LBB6_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3693,36 +3693,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-DPP-NEXT: .LBB6_3: ; GFX9-DPP-NEXT: s_endpgm @@ -3730,32 +3730,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: ; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -3768,38 +3768,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-DPP-NEXT: .LBB6_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -3807,31 +3807,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: ; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -3844,38 +3844,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-DPP-NEXT: .LBB6_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -3884,7 +3884,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -3892,16 +3892,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -3923,18 +3923,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-DPP-NEXT: .LBB6_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3953,22 +3953,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB6_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -3987,26 +3987,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-DPP-NEXT: .LBB6_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4019,19 +4019,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -4042,15 +4042,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42] @@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -4107,43 +4107,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4 ; GFX7LESS-NEXT: .LBB7_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4152,17 +4152,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4189,12 +4189,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB7_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4205,54 +4205,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB7_4 ; GFX9-NEXT: .LBB7_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4261,17 +4261,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4298,12 +4298,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB7_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -4313,56 +4313,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1064-NEXT: .LBB7_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4371,17 +4371,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4402,17 +4402,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB7_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1032-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -4422,38 +4422,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1032-NEXT: .LBB7_5: ; GFX1032-NEXT: s_endpgm @@ -4461,11 +4461,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4473,15 +4473,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -4511,12 +4511,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB7_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4530,15 +4530,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off @@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1164-NEXT: .LBB7_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4563,7 +4563,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -4572,9 +4572,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -4606,19 +4606,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB7_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4632,16 +4632,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1132-NEXT: v_mov_b32_e32 v31, v40 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1132-NEXT: v_mov_b32_e32 v3, s45 +; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: v_mov_b32_e32 v6, 0 -; GFX1132-NEXT: v_mov_b32_e32 v2, s44 +; GFX1132-NEXT: v_mov_b32_e32 v2, s52 ; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off ; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8 @@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1132-NEXT: .LBB7_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -4689,34 +4689,34 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -4725,43 +4725,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4834,20 +4834,20 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45] +; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -4856,54 +4856,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX9-DPP-NEXT: .LBB7_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -4912,17 +4912,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4975,10 +4975,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42] @@ -4989,56 +4989,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1064-DPP-NEXT: .LBB7_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -5047,17 +5047,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5098,15 +5098,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -5116,38 +5116,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1032-DPP-NEXT: .LBB7_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -5155,11 +5155,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -5167,15 +5167,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5239,11 +5239,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB7_2: ; %atomicrmw.start @@ -5261,18 +5261,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42] ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1164-DPP-NEXT: .LBB7_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5290,7 +5290,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -5299,9 +5299,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5356,16 +5356,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB7_2: ; %atomicrmw.start @@ -5381,26 +5381,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42] ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1132-DPP-NEXT: .LBB7_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start @@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6683,13 +6683,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -6697,15 +6697,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_cbranch_execz .LBB10_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -6717,8 +6717,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], 4.0 @@ -6726,8 +6726,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -6736,59 +6736,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-NEXT: .LBB10_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start @@ -6801,36 +6801,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-NEXT: .LBB10_3: ; GFX9-NEXT: s_endpgm @@ -6838,32 +6838,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX1064: ; %bb.0: ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 -; GFX1064-NEXT: s_mov_b32 s42, s9 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s50, s9 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-NEXT: v_mov_b32_e32 v1, s0 @@ -6876,38 +6876,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-NEXT: .LBB10_3: ; GFX1064-NEXT: s_endpgm @@ -6915,31 +6915,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032: ; %bb.0: ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 -; GFX1032-NEXT: s_mov_b32 s42, s9 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s50, s9 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 @@ -6952,38 +6952,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-NEXT: .LBB10_3: ; GFX1032-NEXT: s_endpgm @@ -6992,7 +6992,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164: ; %bb.0: ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -7000,16 +7000,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: s_mov_b32 s33, s10 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-NEXT: v_mov_b32_e32 v1, s0 @@ -7031,18 +7031,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1164-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-NEXT: .LBB10_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7061,22 +7061,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132: ; %bb.0: ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB10_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7095,26 +7095,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-NEXT: .LBB10_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7123,13 +7123,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, exec_lo, 0 ; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, exec_hi, v3 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 @@ -7137,15 +7137,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -7157,8 +7157,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[2:3], 4.0 @@ -7166,8 +7166,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -7176,59 +7176,59 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-DPP-NEXT: .LBB10_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7241,36 +7241,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-DPP-NEXT: .LBB10_3: ; GFX9-DPP-NEXT: s_endpgm @@ -7278,32 +7278,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: ; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 ; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, exec_hi, v3 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -7316,38 +7316,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -7355,31 +7355,31 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_uni_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: ; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, exec_lo, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -7392,38 +7392,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -7432,7 +7432,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -7440,16 +7440,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -7471,18 +7471,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7501,22 +7501,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7535,26 +7535,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7567,19 +7567,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -7590,15 +7590,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42] @@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -7655,43 +7655,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7LESS-NEXT: .LBB11_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -7700,17 +7700,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7737,12 +7737,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB11_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -7753,54 +7753,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB11_4 ; GFX9-NEXT: .LBB11_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -7809,17 +7809,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7846,12 +7846,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB11_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -7861,56 +7861,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1064-NEXT: .LBB11_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -7919,17 +7919,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7950,17 +7950,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB11_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1032-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -7970,38 +7970,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v5, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v5, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v4, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v5, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v4, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1032-NEXT: .LBB11_5: ; GFX1032-NEXT: s_endpgm @@ -8009,11 +8009,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8021,15 +8021,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -8059,12 +8059,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB11_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB11_4: ; %atomicrmw.start @@ -8078,15 +8078,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX1164-NEXT: scratch_store_b64 off, v[4:5], off @@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1164-NEXT: .LBB11_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8111,7 +8111,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -8120,9 +8120,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -8154,19 +8154,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB11_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB11_4: ; %atomicrmw.start @@ -8180,16 +8180,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX1132-NEXT: v_mov_b32_e32 v31, v40 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX1132-NEXT: v_mov_b32_e32 v3, s45 +; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: v_mov_b32_e32 v6, 0 -; GFX1132-NEXT: v_mov_b32_e32 v2, s44 +; GFX1132-NEXT: v_mov_b32_e32 v2, s52 ; GFX1132-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX1132-NEXT: scratch_store_b64 off, v[4:5], off ; GFX1132-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 8 @@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1132-NEXT: .LBB11_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8237,34 +8237,34 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -8273,43 +8273,43 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8382,20 +8382,20 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[44:45], s[44:45] +; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX9-DPP-NEXT: v_max_f64 v[5:6], v[1:2], v[1:2] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -8404,54 +8404,54 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX9-DPP-NEXT: .LBB11_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8460,17 +8460,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8523,10 +8523,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: v_max_f64 v[3:4], v[41:42], v[41:42] @@ -8537,56 +8537,56 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4] ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1064-DPP-NEXT: .LBB11_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8595,17 +8595,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8646,15 +8646,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -8664,38 +8664,38 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1032-DPP-NEXT: .LBB11_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -8703,11 +8703,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.double.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.double.value@gotpcrel32@hi+12 @@ -8715,15 +8715,15 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8787,11 +8787,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB11_2: ; %atomicrmw.start @@ -8809,18 +8809,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42] ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1164-DPP-NEXT: .LBB11_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8838,7 +8838,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8847,9 +8847,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8904,16 +8904,16 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB11_2: ; %atomicrmw.start @@ -8929,26 +8929,26 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_4) ; GFX1132-DPP-NEXT: v_min_f64 v[3:4], v[3:4], v[41:42] ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1132-DPP-NEXT: .LBB11_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index 7126680525b87..416ce5a031810 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -425,12 +425,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -448,8 +448,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -497,12 +497,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -518,9 +518,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -564,13 +564,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -586,8 +586,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -631,13 +631,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -653,8 +653,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -821,19 +821,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -846,11 +846,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -859,7 +859,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -871,12 +871,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -892,9 +892,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -953,13 +953,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -975,8 +975,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1030,13 +1030,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -1052,8 +1052,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1757,12 +1757,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1780,8 +1780,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1829,12 +1829,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1850,9 +1850,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1896,13 +1896,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1918,8 +1918,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1963,13 +1963,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1985,8 +1985,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2153,19 +2153,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -2178,11 +2178,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -2191,7 +2191,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2203,12 +2203,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2224,9 +2224,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2285,13 +2285,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2307,8 +2307,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2362,13 +2362,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2384,8 +2384,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3089,12 +3089,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -3161,12 +3161,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -3182,9 +3182,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3228,13 +3228,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -3250,8 +3250,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3295,13 +3295,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3485,19 +3485,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -3510,11 +3510,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -3523,7 +3523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -3535,12 +3535,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -3556,9 +3556,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3617,13 +3617,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -3639,8 +3639,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3694,13 +3694,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -3716,8 +3716,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3917,12 +3917,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -3940,8 +3940,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -3989,12 +3989,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -4010,9 +4010,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4056,13 +4056,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -4078,8 +4078,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4123,13 +4123,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -4145,8 +4145,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4313,19 +4313,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -4338,11 +4338,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -4351,7 +4351,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -4363,12 +4363,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -4384,9 +4384,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4445,13 +4445,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -4467,8 +4467,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4522,13 +4522,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -4544,8 +4544,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5248,12 +5248,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -5271,8 +5271,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -5320,12 +5320,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5341,9 +5341,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5387,13 +5387,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5409,8 +5409,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5454,16 +5454,16 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 -; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 -; GFX1032-NEXT: s_mov_b32 s12, s8 -; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s13, s9 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_mov_b32 s12, s8 +; GFX1032-NEXT: s_add_u32 s8, s34, 44 +; GFX1032-NEXT: s_mov_b32 s13, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 ; GFX1032-NEXT: s_getpc_b64 s[4:5] ; GFX1032-NEXT: s_add_u32 s4, s4, div.float.value@gotpcrel32@lo+4 @@ -5476,8 +5476,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5644,19 +5644,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -5669,11 +5669,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -5682,7 +5682,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -5694,12 +5694,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -5715,9 +5715,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5776,13 +5776,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -5798,8 +5798,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5853,13 +5853,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -5875,8 +5875,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6075,14 +6075,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 ; GFX7LESS-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3 @@ -6091,16 +6091,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_cbranch_execz .LBB9_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX7LESS-NEXT: v_cvt_f64_u32_e32 v[1:2], s2 ; GFX7LESS-NEXT: v_or_b32_e32 v4, v0, v4 @@ -6113,11 +6113,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -6130,64 +6130,64 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-NEXT: .LBB9_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_mov_b64 s[0:1], exec -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX9-NEXT: s_cbranch_execz .LBB9_3 ; GFX9-NEXT: ; %bb.1: ; GFX9-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s42, s9 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s50, s9 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start @@ -6200,68 +6200,68 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v3, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v3, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-NEXT: .LBB9_3: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[10:11], exec -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB9_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6278,69 +6278,69 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-NEXT: .LBB9_3: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_mov_b32 s9, exec_lo -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: s_bcnt1_i32_b32 s0, s9 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6357,37 +6357,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-NEXT: .LBB9_3: ; GFX1032-NEXT: s_endpgm @@ -6398,7 +6398,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_mov_b64 s[10:11], exec ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -6407,16 +6407,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_cbranch_execz .LBB9_3 ; GFX1164-NEXT: ; %bb.1: ; GFX1164-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -6439,18 +6439,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-NEXT: .LBB9_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6470,24 +6470,24 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_mov_b32 s8, exec_lo ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB9_3 ; GFX1132-NEXT: ; %bb.1: ; GFX1132-NEXT: s_bcnt1_i32_b32 s0, s8 -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -6507,25 +6507,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-NEXT: .LBB9_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6534,14 +6534,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 ; GFX7LESS-DPP-NEXT: v_mbcnt_hi_u32_b32_e32 v3, s1, v3 @@ -6550,16 +6550,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX7LESS-DPP-NEXT: v_cvt_f64_u32_e32 v[1:2], s2 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v4, v0, v4 @@ -6572,11 +6572,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -6589,64 +6589,64 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-DPP-NEXT: .LBB9_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[0:1], exec -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s0, 0 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX9-DPP-NEXT: ; %bb.1: ; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, s[0:1] -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start @@ -6659,68 +6659,68 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-DPP-NEXT: .LBB9_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], exec -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1064-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s10, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s11, v3 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6737,69 +6737,69 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-DPP-NEXT: .LBB9_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_mov_b32 s9, exec_lo -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 ; GFX1032-DPP-NEXT: v_mbcnt_lo_u32_b32 v3, s9, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, s9 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: v_cvt_f64_u32_e32 v[3:4], s0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 @@ -6816,37 +6816,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-DPP-NEXT: .LBB9_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -6857,7 +6857,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], exec ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s10, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -6866,16 +6866,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, s[10:11] -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1164-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -6898,18 +6898,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -6917,8 +6917,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-DPP-NEXT: .LBB9_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6929,24 +6929,24 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s8, exec_lo ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB9_3 ; GFX1132-DPP-NEXT: ; %bb.1: ; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, s8 -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: v_cvt_f64_u32_e32 v[0:1], s0 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-DPP-NEXT: v_mul_f64 v[41:42], v[0:1], 4.0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -6966,25 +6966,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-DPP-NEXT: .LBB9_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6997,19 +6997,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -7020,15 +7020,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -7053,21 +7053,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -7080,44 +7080,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4 ; GFX7LESS-NEXT: .LBB10_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7126,17 +7126,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7161,11 +7161,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB10_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -7176,53 +7176,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB10_4 ; GFX9-NEXT: .LBB10_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7231,17 +7231,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7266,11 +7266,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB10_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -7280,55 +7280,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1064-NEXT: .LBB10_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7337,17 +7337,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7366,16 +7366,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB10_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -7385,37 +7385,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1032-NEXT: .LBB10_5: ; GFX1032-NEXT: s_endpgm @@ -7423,11 +7423,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7435,15 +7435,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -7471,11 +7471,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB10_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB10_4: ; %atomicrmw.start @@ -7492,18 +7492,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -7511,8 +7511,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1164-NEXT: .LBB10_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7521,7 +7521,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -7530,9 +7530,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -7541,7 +7541,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -7561,17 +7561,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB10_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB10_4: ; %atomicrmw.start @@ -7586,25 +7586,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1132-NEXT: .LBB10_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7613,22 +7613,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -7639,30 +7639,30 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -7675,44 +7675,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7721,17 +7721,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7778,74 +7778,74 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[44:45] +; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[52:53] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-DPP-NEXT: .LBB10_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7854,17 +7854,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7909,10 +7909,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) @@ -7922,55 +7922,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -7979,17 +7979,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8024,14 +8024,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -8041,37 +8041,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -8079,11 +8079,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -8091,15 +8091,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8153,10 +8153,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -8173,18 +8173,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -8192,8 +8192,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8202,7 +8202,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8211,9 +8211,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -8222,7 +8222,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8264,14 +8264,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -8286,25 +8286,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8835,12 +8835,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -8858,8 +8858,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -8912,12 +8912,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -8933,9 +8933,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8982,13 +8982,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -9004,8 +9004,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9052,13 +9052,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -9074,8 +9074,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9249,19 +9249,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -9274,11 +9274,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -9289,7 +9289,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -9302,12 +9302,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -9323,9 +9323,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9401,13 +9401,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -9423,8 +9423,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9490,13 +9490,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -9512,8 +9512,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10268,12 +10268,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -10291,8 +10291,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -10345,12 +10345,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -10366,9 +10366,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10415,13 +10415,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -10437,8 +10437,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10485,13 +10485,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -10507,8 +10507,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10682,19 +10682,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -10707,11 +10707,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -10722,7 +10722,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -10735,12 +10735,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -10756,9 +10756,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10834,13 +10834,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -10856,8 +10856,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10923,13 +10923,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -10945,8 +10945,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11183,12 +11183,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s38, -1 -; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 -; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 +; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -11206,8 +11206,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -11260,12 +11260,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -11281,9 +11281,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11330,13 +11330,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s38, -1 -; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s50, -1 +; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -11352,8 +11352,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11400,13 +11400,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s38, -1 -; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s50, -1 +; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -11422,8 +11422,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11597,19 +11597,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s40, s40, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s41, s41, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -11622,11 +11622,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -11637,7 +11637,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -11650,12 +11650,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s38, -1 -; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 -; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s50, -1 +; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -11671,9 +11671,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11749,13 +11749,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -11771,8 +11771,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11838,13 +11838,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -11860,8 +11860,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -12097,13 +12097,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0 ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX7LESS-NEXT: s_mov_b32 s1, 0x43300000 @@ -12117,15 +12117,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_cbranch_execz .LBB16_3 ; GFX7LESS-NEXT: ; %bb.1: ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -12137,11 +12137,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -12154,40 +12154,40 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-NEXT: .LBB16_3: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX9-NEXT: v_mov_b32_e32 v4, 0xc3300000 ; GFX9-NEXT: s_mov_b32 s1, 0x43300000 @@ -12200,20 +12200,20 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-NEXT: s_cbranch_execz .LBB16_3 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start @@ -12226,48 +12226,48 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-NEXT: .LBB16_3: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1064-NEXT: s_mov_b32 s1, 0x43300000 ; GFX1064-NEXT: s_movk_i32 s32, 0x800 @@ -12279,19 +12279,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-NEXT: s_cbranch_execz .LBB16_3 ; GFX1064-NEXT: ; %bb.1: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 -; GFX1064-NEXT: s_mov_b32 s42, s9 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_mov_b32 s50, s9 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-NEXT: v_mov_b32_e32 v1, s0 @@ -12304,53 +12304,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-NEXT: .LBB16_3: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12359,18 +12359,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB16_3 ; GFX1032-NEXT: ; %bb.1: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 -; GFX1032-NEXT: s_mov_b32 s42, s9 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_mov_b32 s50, s9 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-NEXT: v_mov_b32_e32 v1, s0 @@ -12383,44 +12383,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-NEXT: .LBB16_3: ; GFX1032-NEXT: s_endpgm ; ; GFX1164-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1164: ; %bb.0: -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0x43300000 @@ -12441,16 +12441,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-NEXT: s_cbranch_execz .LBB16_3 ; GFX1164-NEXT: ; %bb.1: -; GFX1164-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-NEXT: s_mov_b32 s33, s10 -; GFX1164-NEXT: s_mov_b32 s42, s9 -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s50, s9 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-NEXT: v_mov_b32_e32 v1, s0 @@ -12471,18 +12471,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12490,8 +12490,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-NEXT: .LBB16_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12499,12 +12499,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; ; GFX1132-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1132: ; %bb.0: -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_clause 0x1 ; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12519,15 +12519,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_cbranch_execz .LBB16_3 ; GFX1132-NEXT: ; %bb.1: -; GFX1132-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-NEXT: s_mov_b32 s33, s15 -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 @@ -12545,25 +12545,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-NEXT: .LBB16_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12572,13 +12572,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v5, exec_lo, 0 ; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX7LESS-DPP-NEXT: s_mov_b32 s1, 0x43300000 @@ -12592,15 +12592,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX7LESS-DPP-NEXT: ; %bb.1: ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -12612,11 +12612,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -12629,40 +12629,40 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-DPP-NEXT: .LBB16_3: ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX9-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s66, -1 +; GFX9-DPP-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0xc3300000 ; GFX9-DPP-NEXT: s_mov_b32 s1, 0x43300000 @@ -12675,20 +12675,20 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start @@ -12701,48 +12701,48 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-DPP-NEXT: .LBB16_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1064-DPP-NEXT: s_mov_b32 s1, 0x43300000 ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 @@ -12754,19 +12754,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -12779,53 +12779,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-DPP-NEXT: .LBB16_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12834,18 +12834,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x24 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v4, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[44:45], 0x0 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -12858,44 +12858,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-DPP-NEXT: .LBB16_3: ; GFX1032-DPP-NEXT: s_endpgm ; ; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1164-DPP: ; %bb.0: -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_bcnt1_i32_b64 s0, exec ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 @@ -12916,16 +12916,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s1 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, s0 @@ -12946,18 +12946,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -12965,8 +12965,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-DPP-NEXT: .LBB16_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12974,12 +12974,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; ; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_uni_value_default_scope_strictfp: ; GFX1132-DPP: ; %bb.0: -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_clause 0x1 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12994,15 +12994,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB16_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[4:5], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[4:5], 0x24 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[44:45], 0x0 +; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -13020,25 +13020,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-DPP-NEXT: .LBB16_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13051,19 +13051,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] @@ -13074,15 +13074,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v40, v0, v2 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -13107,21 +13107,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -13134,44 +13134,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4 ; GFX7LESS-NEXT: .LBB17_5: ; GFX7LESS-NEXT: s_endpgm ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b32 s43, s8 +; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_add_u32 s8, s36, 44 -; GFX9-NEXT: s_mov_b32 s42, s9 +; GFX9-NEXT: s_mov_b32 s50, s9 ; GFX9-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13180,17 +13180,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13215,11 +13215,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX9-NEXT: s_cbranch_execz .LBB17_5 ; GFX9-NEXT: ; %bb.3: -; GFX9-NEXT: s_load_dwordx2 s[44:45], s[36:37], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[46:47], 0 +; GFX9-NEXT: s_mov_b64 s[62:63], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -13230,53 +13230,53 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], 0 -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX9-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s43 -; GFX9-NEXT: s_mov_b32 s13, s42 +; GFX9-NEXT: s_mov_b32 s12, s51 +; GFX9-NEXT: s_mov_b32 s13, s50 ; GFX9-NEXT: s_mov_b32 s14, s33 ; GFX9-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s44 -; GFX9-NEXT: v_mov_b32_e32 v3, s45 +; GFX9-NEXT: v_mov_b32_e32 v2, s52 +; GFX9-NEXT: v_mov_b32_e32 v3, s53 ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX9-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX9-NEXT: s_cbranch_execnz .LBB17_4 ; GFX9-NEXT: .LBB17_5: ; GFX9-NEXT: s_endpgm ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s66, -1 +; GFX1064-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-NEXT: s_mov_b32 s43, s8 +; GFX1064-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-NEXT: s_mov_b32 s42, s9 +; GFX1064-NEXT: s_mov_b32 s50, s9 ; GFX1064-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13285,17 +13285,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 +; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13320,11 +13320,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1064-NEXT: s_cbranch_execz .LBB17_5 ; GFX1064-NEXT: ; %bb.3: -; GFX1064-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-NEXT: s_waitcnt vmcnt(0) @@ -13334,55 +13334,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: s_getpc_b64 s[0:1] ; GFX1064-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-NEXT: s_mov_b32 s12, s43 -; GFX1064-NEXT: s_mov_b32 s13, s42 +; GFX1064-NEXT: s_mov_b32 s12, s51 +; GFX1064-NEXT: s_mov_b32 s13, s50 ; GFX1064-NEXT: s_mov_b32 s14, s33 -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-NEXT: s_clause 0x1 -; GFX1064-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1064-NEXT: .LBB17_5: ; GFX1064-NEXT: s_endpgm ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s66, -1 +; GFX1032-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-NEXT: s_mov_b32 s43, s8 +; GFX1032-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-NEXT: s_mov_b32 s42, s9 +; GFX1032-NEXT: s_mov_b32 s50, s9 ; GFX1032-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13391,17 +13391,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 +; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13420,16 +13420,16 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s46, 0 +; GFX1032-NEXT: s_mov_b32 s62, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1032-NEXT: s_cbranch_execz .LBB17_5 ; GFX1032-NEXT: ; %bb.3: -; GFX1032-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-NEXT: s_waitcnt vmcnt(0) @@ -13439,37 +13439,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: s_getpc_b64 s[0:1] ; GFX1032-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-NEXT: s_mov_b32 s12, s43 -; GFX1032-NEXT: s_mov_b32 s13, s42 +; GFX1032-NEXT: s_mov_b32 s12, s51 +; GFX1032-NEXT: s_mov_b32 s13, s50 ; GFX1032-NEXT: s_mov_b32 s14, s33 -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-NEXT: s_clause 0x1 -; GFX1032-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1032-NEXT: .LBB17_5: ; GFX1032-NEXT: s_endpgm @@ -13477,11 +13477,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1164: ; %bb.0: ; GFX1164-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-NEXT: s_mov_b32 s43, s8 +; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-NEXT: s_mov_b32 s42, s9 +; GFX1164-NEXT: s_mov_b32 s50, s9 ; GFX1164-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-NEXT: s_getpc_b64 s[0:1] ; GFX1164-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13489,15 +13489,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-NEXT: s_mov_b32 s33, s10 ; GFX1164-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -13525,11 +13525,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX1164-NEXT: s_cbranch_execz .LBB17_5 ; GFX1164-NEXT: ; %bb.3: -; GFX1164-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-NEXT: .p2align 6 ; GFX1164-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13546,18 +13546,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-NEXT: s_mov_b32 s12, s43 -; GFX1164-NEXT: s_mov_b32 s13, s42 +; GFX1164-NEXT: s_mov_b32 s12, s51 +; GFX1164-NEXT: s_mov_b32 s13, s50 ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -13565,8 +13565,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1164-NEXT: .LBB17_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13575,7 +13575,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1132: ; %bb.0: ; GFX1132-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-NEXT: s_getpc_b64 s[0:1] @@ -13584,9 +13584,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-NEXT: s_mov_b32 s42, s14 -; GFX1132-NEXT: s_mov_b32 s43, s13 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-NEXT: s_mov_b32 s50, s14 +; GFX1132-NEXT: s_mov_b32 s51, s13 +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s13 @@ -13595,7 +13595,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -13615,17 +13615,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s46, 0 +; GFX1132-NEXT: s_mov_b32 s62, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1132-NEXT: s_xor_b32 s0, exec_lo, s0 ; GFX1132-NEXT: s_cbranch_execz .LBB17_5 ; GFX1132-NEXT: ; %bb.3: -; GFX1132-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-NEXT: .p2align 6 ; GFX1132-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13640,25 +13640,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-NEXT: s_mov_b32 s12, s43 -; GFX1132-NEXT: s_mov_b32 s13, s42 +; GFX1132-NEXT: s_mov_b32 s12, s51 +; GFX1132-NEXT: s_mov_b32 s13, s50 ; GFX1132-NEXT: s_mov_b32 s14, s33 ; GFX1132-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1132-NEXT: .LBB17_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13667,22 +13667,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 -; GFX7LESS-DPP-NEXT: s_mov_b32 s42, s9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s43, s8 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[44:45], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s47, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s46, -1 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -13693,30 +13693,30 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v42, v0, v2 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[44:47], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 ; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -13729,44 +13729,44 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s43 -; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s42 +; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 +; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s44 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[48:51], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s54, -1 -; GFX9-DPP-NEXT: s_mov_b32 s55, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s52, s52, s11 -; GFX9-DPP-NEXT: s_addc_u32 s53, s53, 0 +; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s82, -1 +; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 +; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b32 s43, s8 +; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX9-DPP-NEXT: s_mov_b32 s42, s9 +; GFX9-DPP-NEXT: s_mov_b32 s50, s9 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX9-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13775,17 +13775,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13832,74 +13832,74 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v1, exec_hi, v1 ; GFX9-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 -; GFX9-DPP-NEXT: v_readlane_b32 s45, v9, 63 -; GFX9-DPP-NEXT: v_readlane_b32 s44, v8, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s53, v9, 63 +; GFX9-DPP-NEXT: v_readlane_b32 s52, v8, 63 ; GFX9-DPP-NEXT: s_mov_b64 exec, s[0:1] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[46:47], s[36:37], 0x24 -; GFX9-DPP-NEXT: s_mov_b64 s[48:49], 0 +; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[46:47] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] ; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) -; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[44:45] +; GFX9-DPP-NEXT: v_add_f64 v[3:4], v[1:2], -s[52:53] ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX9-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX9-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[52:53] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[52:55], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[52:55], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[52:55], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-DPP-NEXT: s_mov_b32 s12, s43 -; GFX9-DPP-NEXT: s_mov_b32 s13, s42 +; GFX9-DPP-NEXT: s_mov_b32 s12, s51 +; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[54:55] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s46 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s47 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[52:55], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[52:55], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[48:49], vcc, s[48:49] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[48:49] +; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[64:65] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX9-DPP-NEXT: .LBB17_3: ; GFX9-DPP-NEXT: s_endpgm ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s67, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1064-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1064-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1064-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1064-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1064-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1064-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -13908,17 +13908,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1064-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13963,10 +13963,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1064-DPP-NEXT: ; %bb.1: -; GFX1064-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 +; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX1064-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1064-DPP-NEXT: s_waitcnt vmcnt(0) @@ -13976,55 +13976,55 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1064-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1064-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1064-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1064-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1064-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1064-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1064-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1064-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1064-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1064-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1064-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1064-DPP-NEXT: s_clause 0x1 -; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1064-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[46:47] +; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1064-DPP-NEXT: .LBB17_3: ; GFX1064-DPP-NEXT: s_endpgm ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s66, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 -; GFX1032-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1032-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1032-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1032-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -14033,17 +14033,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -14078,14 +14078,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1032-DPP-NEXT: ; %bb.1: -; GFX1032-DPP-NEXT: s_load_dwordx2 s[44:45], s[34:35], 0x24 +; GFX1032-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[44:45] +; GFX1032-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1032-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX1032-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1032-DPP-NEXT: s_waitcnt vmcnt(0) @@ -14095,37 +14095,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1032-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX1032-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[48:51], 0 offset:4 -; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[48:51], 0 +; GFX1032-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX1032-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1032-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1032-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1032-DPP-NEXT: s_mov_b32 s14, s33 -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] -; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[48:51], 0 offset:12 -; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[48:51], 0 offset:8 -; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX1032-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 +; GFX1032-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 +; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX1032-DPP-NEXT: s_clause 0x1 -; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 -; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 offset:4 +; GFX1032-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 +; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s46 +; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1032-DPP-NEXT: .LBB17_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -14133,11 +14133,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1164-DPP: ; %bb.0: ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b32 s43, s8 +; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_add_u32 s8, s34, 44 -; GFX1164-DPP-NEXT: s_mov_b32 s42, s9 +; GFX1164-DPP-NEXT: s_mov_b32 s50, s9 ; GFX1164-DPP-NEXT: s_addc_u32 s9, s35, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1164-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1164-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX1164-DPP-NEXT: s_add_u32 s0, s0, div.float.value@gotpcrel32@lo+4 ; GFX1164-DPP-NEXT: s_addc_u32 s1, s1, div.float.value@gotpcrel32@hi+12 @@ -14145,15 +14145,15 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1164-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -14207,10 +14207,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1164-DPP-NEXT: ; %bb.1: -; GFX1164-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], 0 +; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 +; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1164-DPP-NEXT: .p2align 6 ; GFX1164-DPP-NEXT: .LBB17_2: ; %atomicrmw.start @@ -14227,18 +14227,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v31, v40 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s44 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v2, s52 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1164-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1164-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s45 +; GFX1164-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] @@ -14246,8 +14246,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[46:47], vcc, s[46:47] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[46:47] +; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1164-DPP-NEXT: .LBB17_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -14256,7 +14256,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX1132-DPP: ; %bb.0: ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[40:41], s[0:1] +; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1132-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1132-DPP-NEXT: s_addc_u32 s9, s35, 0 ; GFX1132-DPP-NEXT: s_getpc_b64 s[0:1] @@ -14265,9 +14265,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mov_b32_e32 v31, v0 ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1132-DPP-NEXT: s_mov_b32 s42, s14 -; GFX1132-DPP-NEXT: s_mov_b32 s43, s13 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] +; GFX1132-DPP-NEXT: s_mov_b32 s50, s14 +; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s13 @@ -14276,7 +14276,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -14318,14 +14318,14 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s46, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1132-DPP-NEXT: ; %bb.1: -; GFX1132-DPP-NEXT: s_load_b64 s[44:45], s[34:35], 0x24 +; GFX1132-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[44:45] +; GFX1132-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX1132-DPP-NEXT: .p2align 6 ; GFX1132-DPP-NEXT: .LBB17_2: ; %atomicrmw.start @@ -14340,25 +14340,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[1:2], off ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v31, v40 :: v_dual_mov_b32 v0, 8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s44 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s52 ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 -; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] -; GFX1132-DPP-NEXT: s_mov_b32 s12, s43 -; GFX1132-DPP-NEXT: s_mov_b32 s13, s42 +; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 +; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 ; GFX1132-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1132-DPP-NEXT: scratch_store_b64 off, v[3:4], off offset:8 -; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s45 :: v_dual_mov_b32 v4, 0 +; GFX1132-DPP-NEXT: v_dual_mov_b32 v3, s53 :: v_dual_mov_b32 v4, 0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: scratch_load_b64 v[1:2], off, off ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s46, vcc_lo, s46 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s46 +; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1132-DPP-NEXT: .LBB17_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir index dde84af57ed25..8ae89ad96a16b 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir @@ -31,102 +31,105 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr34_sgpr35 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr15 - ; CHECK-NEXT: renamable $sgpr42 = COPY $sgpr14 + ; CHECK-NEXT: renamable $sgpr50 = COPY $sgpr14 ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY $sgpr6_sgpr7 - ; CHECK-NEXT: renamable $sgpr40_sgpr41 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: renamable $sgpr66_sgpr67 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) - ; CHECK-NEXT: renamable $sgpr44 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr45 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr46 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr47 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr48 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr49 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr50 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr51 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr52 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr53 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr54 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr55 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr58 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr59 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr60 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr61 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr62 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr63 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: renamable $sgpr62_sgpr63 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: renamable $sgpr64 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr68_sgpr69 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr65 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr66 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr67 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr68 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr69 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr70 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr71 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr72 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr73 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr74 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr75 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr76 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr77 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr78 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr79 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr80 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr81 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr82 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr83 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr84 = S_MOV_B32 0 + ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr52_sgpr53 = IMPLICIT_DEF ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr40_sgpr41 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr38_sgpr39 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr48_sgpr49 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr46_sgpr47 ; CHECK-NEXT: $sgpr8_sgpr9 = COPY killed renamable $sgpr34_sgpr35 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37 - ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr42 + ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr50 ; CHECK-NEXT: $sgpr13 = COPY killed renamable $sgpr33 - ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $exec, implicit-def $exec ; CHECK-NEXT: dead renamable $sgpr6_sgpr7 = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) - ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr66_sgpr67:0x000000000000000F, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr66_sgpr67:0x000000000000000F, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 - ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr67, 1, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr63, 1, implicit-def dead $scc ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) - ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr66_sgpr67:0x000000000000000F, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95:0x0000000000000003 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr44 - ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr44 - ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec + ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr64 + ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr66_sgpr67:0x0000000000000003, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x000003FFFFFFFFFF + ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr62_sgpr63:0x0000000000000003 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc - ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr66, 1, implicit-def dead $scc - ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 + ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr62, 1, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: bb.0: @@ -211,7 +214,6 @@ body: | %15.sub19:sgpr_1024 = COPY %7.sub0 %15.sub20:sgpr_1024 = COPY %7.sub0 %15.sub21:sgpr_1024 = COPY %7.sub0 - ; Spill code ends up getting inserted here, and we end up with many unspillable sgpr1024 ranges %16:vreg_1024 = COPY %15, implicit $exec $exec = S_XOR_B64_term $exec, %14, implicit-def $scc S_CBRANCH_EXECZ %bb.5, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 81eac63ae5bdf..1a8557d25fb92 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -8,115 +8,140 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v5, s30, 0 -; CHECK-NEXT: v_writelane_b32 v5, s31, 1 -; CHECK-NEXT: v_writelane_b32 v5, s34, 2 -; CHECK-NEXT: v_writelane_b32 v5, s35, 3 -; CHECK-NEXT: v_writelane_b32 v5, s36, 4 -; CHECK-NEXT: v_writelane_b32 v5, s37, 5 -; CHECK-NEXT: v_writelane_b32 v5, s38, 6 -; CHECK-NEXT: v_writelane_b32 v5, s39, 7 -; CHECK-NEXT: v_writelane_b32 v5, s40, 8 -; CHECK-NEXT: v_writelane_b32 v5, s41, 9 -; CHECK-NEXT: v_writelane_b32 v5, s42, 10 -; CHECK-NEXT: v_writelane_b32 v5, s43, 11 -; CHECK-NEXT: v_writelane_b32 v5, s44, 12 -; CHECK-NEXT: v_writelane_b32 v5, s45, 13 -; CHECK-NEXT: v_writelane_b32 v5, s46, 14 +; CHECK-NEXT: v_writelane_b32 v5, s36, 0 +; CHECK-NEXT: v_writelane_b32 v5, s37, 1 +; CHECK-NEXT: v_writelane_b32 v5, s46, 2 +; CHECK-NEXT: v_writelane_b32 v5, s47, 3 +; CHECK-NEXT: v_writelane_b32 v5, s48, 4 +; CHECK-NEXT: v_writelane_b32 v5, s49, 5 +; CHECK-NEXT: v_writelane_b32 v5, s50, 6 +; CHECK-NEXT: v_writelane_b32 v5, s51, 7 ; CHECK-NEXT: s_getpc_b64 s[24:25] -; CHECK-NEXT: v_writelane_b32 v5, s47, 15 -; CHECK-NEXT: s_movk_i32 s20, 0xf0 +; CHECK-NEXT: v_writelane_b32 v5, s52, 8 +; CHECK-NEXT: s_movk_i32 s4, 0xf0 +; CHECK-NEXT: s_mov_b32 s5, s24 +; CHECK-NEXT: v_writelane_b32 v5, s53, 9 +; CHECK-NEXT: s_load_dwordx16 s[44:59], s[4:5], 0x0 +; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane +; CHECK-NEXT: s_mov_b64 s[4:5], 0 +; CHECK-NEXT: s_load_dwordx4 s[40:43], s[4:5], 0x0 +; CHECK-NEXT: s_movk_i32 s20, 0x130 ; CHECK-NEXT: s_mov_b32 s21, s24 -; CHECK-NEXT: v_writelane_b32 v5, s48, 16 -; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; CHECK-NEXT: s_mov_b64 s[20:21], 0 -; CHECK-NEXT: v_writelane_b32 v5, s49, 17 -; CHECK-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0 -; CHECK-NEXT: v_writelane_b32 v5, s50, 18 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s22, 0x130 -; CHECK-NEXT: s_mov_b32 s23, s24 -; CHECK-NEXT: v_writelane_b32 v5, s51, 19 -; CHECK-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0x0 -; CHECK-NEXT: s_mov_b32 s28, 0 +; CHECK-NEXT: v_writelane_b32 v7, s44, 0 +; CHECK-NEXT: v_writelane_b32 v7, s45, 1 +; CHECK-NEXT: v_writelane_b32 v7, s46, 2 +; CHECK-NEXT: v_writelane_b32 v7, s47, 3 +; CHECK-NEXT: v_writelane_b32 v7, s48, 4 +; CHECK-NEXT: v_writelane_b32 v7, s49, 5 +; CHECK-NEXT: v_writelane_b32 v7, s50, 6 +; CHECK-NEXT: v_writelane_b32 v7, s51, 7 +; CHECK-NEXT: v_writelane_b32 v7, s52, 8 +; CHECK-NEXT: v_writelane_b32 v7, s53, 9 +; CHECK-NEXT: v_writelane_b32 v7, s54, 10 +; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 +; CHECK-NEXT: v_writelane_b32 v7, s55, 11 +; CHECK-NEXT: v_writelane_b32 v7, s56, 12 +; CHECK-NEXT: s_mov_b32 s20, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: v_mov_b32_e32 v2, s20 +; CHECK-NEXT: v_writelane_b32 v7, s57, 13 +; CHECK-NEXT: v_mov_b32_e32 v2, s40 ; CHECK-NEXT: v_mov_b32_e32 v3, v1 -; CHECK-NEXT: s_mov_b32 s29, s28 -; CHECK-NEXT: s_mov_b32 s30, s28 -; CHECK-NEXT: s_mov_b32 s31, s28 -; CHECK-NEXT: image_sample_lz v3, v[2:3], s[12:19], s[28:31] dmask:0x1 +; CHECK-NEXT: s_mov_b32 s21, s20 +; CHECK-NEXT: s_mov_b32 s22, s20 +; CHECK-NEXT: s_mov_b32 s23, s20 +; CHECK-NEXT: v_writelane_b32 v7, s58, 14 +; CHECK-NEXT: v_writelane_b32 v7, s59, 15 +; CHECK-NEXT: image_sample_lz v3, v[2:3], s[52:59], s[20:23] dmask:0x1 ; CHECK-NEXT: v_mov_b32_e32 v2, v1 -; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane -; CHECK-NEXT: v_writelane_b32 v5, s52, 20 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v6, s36, 0 -; CHECK-NEXT: v_writelane_b32 v5, s53, 21 -; CHECK-NEXT: v_writelane_b32 v5, s54, 22 -; CHECK-NEXT: v_writelane_b32 v5, s55, 23 -; CHECK-NEXT: v_writelane_b32 v5, s56, 24 -; CHECK-NEXT: v_writelane_b32 v6, s37, 1 -; CHECK-NEXT: image_sample_lz v4, v[1:2], s[36:43], s[28:31] dmask:0x1 -; CHECK-NEXT: v_writelane_b32 v5, s57, 25 -; CHECK-NEXT: v_writelane_b32 v6, s38, 2 -; CHECK-NEXT: v_writelane_b32 v5, s58, 26 -; CHECK-NEXT: v_writelane_b32 v6, s39, 3 -; CHECK-NEXT: v_writelane_b32 v5, s59, 27 -; CHECK-NEXT: v_writelane_b32 v6, s40, 4 -; CHECK-NEXT: v_writelane_b32 v5, s60, 28 -; CHECK-NEXT: v_writelane_b32 v6, s41, 5 -; CHECK-NEXT: v_writelane_b32 v5, s61, 29 -; CHECK-NEXT: v_writelane_b32 v6, s42, 6 -; CHECK-NEXT: v_writelane_b32 v5, s62, 30 -; CHECK-NEXT: v_writelane_b32 v6, s43, 7 -; CHECK-NEXT: v_writelane_b32 v5, s63, 31 -; CHECK-NEXT: v_writelane_b32 v6, s44, 8 -; CHECK-NEXT: v_writelane_b32 v5, s64, 32 -; CHECK-NEXT: v_writelane_b32 v6, s45, 9 -; CHECK-NEXT: v_writelane_b32 v5, s65, 33 -; CHECK-NEXT: v_writelane_b32 v6, s46, 10 -; CHECK-NEXT: v_writelane_b32 v5, s66, 34 -; CHECK-NEXT: v_writelane_b32 v6, s47, 11 -; CHECK-NEXT: v_writelane_b32 v5, s67, 35 -; CHECK-NEXT: v_writelane_b32 v6, s48, 12 -; CHECK-NEXT: v_writelane_b32 v5, s68, 36 -; CHECK-NEXT: v_writelane_b32 v6, s49, 13 -; CHECK-NEXT: v_writelane_b32 v5, s69, 37 -; CHECK-NEXT: v_writelane_b32 v6, s50, 14 -; CHECK-NEXT: s_mov_b32 s34, 48 -; CHECK-NEXT: s_movk_i32 s52, 0x1f0 -; CHECK-NEXT: s_movk_i32 s68, 0x2f0 -; CHECK-NEXT: s_mov_b32 s35, s24 -; CHECK-NEXT: s_mov_b32 s53, s24 -; CHECK-NEXT: s_mov_b32 s69, s24 -; CHECK-NEXT: v_writelane_b32 v6, s51, 15 -; CHECK-NEXT: s_load_dwordx8 s[20:27], s[34:35], 0x0 -; CHECK-NEXT: s_load_dwordx16 s[36:51], s[52:53], 0x0 +; CHECK-NEXT: v_writelane_b32 v7, s4, 16 +; CHECK-NEXT: v_writelane_b32 v7, s5, 17 +; CHECK-NEXT: v_writelane_b32 v7, s6, 18 +; CHECK-NEXT: v_writelane_b32 v7, s7, 19 +; CHECK-NEXT: v_writelane_b32 v7, s8, 20 +; CHECK-NEXT: v_writelane_b32 v7, s9, 21 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[4:11], s[20:23] dmask:0x1 +; CHECK-NEXT: v_writelane_b32 v7, s10, 22 +; CHECK-NEXT: v_writelane_b32 v7, s11, 23 +; CHECK-NEXT: v_writelane_b32 v7, s12, 24 +; CHECK-NEXT: v_writelane_b32 v7, s13, 25 +; CHECK-NEXT: v_writelane_b32 v7, s14, 26 +; CHECK-NEXT: v_writelane_b32 v7, s15, 27 +; CHECK-NEXT: v_writelane_b32 v7, s16, 28 +; CHECK-NEXT: v_writelane_b32 v7, s17, 29 +; CHECK-NEXT: v_writelane_b32 v7, s18, 30 +; CHECK-NEXT: s_mov_b32 s26, 48 +; CHECK-NEXT: s_mov_b32 s27, s24 +; CHECK-NEXT: v_writelane_b32 v7, s19, 31 +; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0 +; CHECK-NEXT: v_writelane_b32 v5, s62, 10 +; CHECK-NEXT: v_writelane_b32 v5, s63, 11 +; CHECK-NEXT: v_writelane_b32 v5, s64, 12 +; CHECK-NEXT: v_writelane_b32 v5, s65, 13 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_writelane_b32 v7, s4, 32 +; CHECK-NEXT: v_writelane_b32 v7, s5, 33 +; CHECK-NEXT: v_writelane_b32 v7, s6, 34 +; CHECK-NEXT: v_writelane_b32 v7, s7, 35 +; CHECK-NEXT: v_writelane_b32 v7, s8, 36 +; CHECK-NEXT: v_writelane_b32 v7, s9, 37 +; CHECK-NEXT: v_writelane_b32 v5, s66, 14 +; CHECK-NEXT: s_movk_i32 s28, 0x1f0 +; CHECK-NEXT: s_movk_i32 s70, 0x2f0 +; CHECK-NEXT: s_mov_b32 s29, s24 +; CHECK-NEXT: s_mov_b32 s71, s24 +; CHECK-NEXT: v_writelane_b32 v7, s10, 38 +; CHECK-NEXT: v_writelane_b32 v5, s67, 15 +; CHECK-NEXT: v_writelane_b32 v7, s11, 39 +; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0 +; CHECK-NEXT: s_load_dwordx16 s[4:19], s[70:71], 0x0 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 -; CHECK-NEXT: s_load_dwordx16 s[52:67], s[68:69], 0x0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; CHECK-NEXT: v_writelane_b32 v5, s70, 38 -; CHECK-NEXT: s_xor_b64 s[34:35], vcc, -1 -; CHECK-NEXT: v_writelane_b32 v5, s71, 39 +; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_mul_f32_e32 v0, v4, v3 -; CHECK-NEXT: s_and_saveexec_b64 vcc, s[34:35] -; CHECK-NEXT: s_xor_b64 s[68:69], exec, vcc +; CHECK-NEXT: s_and_saveexec_b64 s[26:27], s[24:25] +; CHECK-NEXT: s_xor_b64 s[26:27], exec, s[26:27] ; CHECK-NEXT: s_cbranch_execz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %bb48 -; CHECK-NEXT: image_sample_lz v3, v[1:2], s[12:19], s[28:31] dmask:0x1 -; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_readlane_b32 s36, v7, 0 +; CHECK-NEXT: v_readlane_b32 s44, v7, 8 +; CHECK-NEXT: v_readlane_b32 s45, v7, 9 +; CHECK-NEXT: v_readlane_b32 s46, v7, 10 +; CHECK-NEXT: v_readlane_b32 s47, v7, 11 +; CHECK-NEXT: v_readlane_b32 s48, v7, 12 +; CHECK-NEXT: v_readlane_b32 s49, v7, 13 +; CHECK-NEXT: v_readlane_b32 s50, v7, 14 +; CHECK-NEXT: v_readlane_b32 s51, v7, 15 ; CHECK-NEXT: s_and_b64 vcc, exec, -1 +; CHECK-NEXT: v_readlane_b32 s37, v7, 1 +; CHECK-NEXT: v_readlane_b32 s38, v7, 2 +; CHECK-NEXT: v_readlane_b32 s39, v7, 3 +; CHECK-NEXT: v_readlane_b32 s40, v7, 4 +; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_readlane_b32 s41, v7, 5 +; CHECK-NEXT: v_readlane_b32 s42, v7, 6 +; CHECK-NEXT: v_readlane_b32 s43, v7, 7 ; CHECK-NEXT: .LBB0_2: ; %bb50 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: s_mov_b32 s29, s28 -; CHECK-NEXT: s_mov_b32 s30, s28 -; CHECK-NEXT: s_mov_b32 s31, s28 +; CHECK-NEXT: v_readlane_b32 s36, v7, 32 +; CHECK-NEXT: v_readlane_b32 s40, v7, 36 +; CHECK-NEXT: v_readlane_b32 s41, v7, 37 +; CHECK-NEXT: v_readlane_b32 s42, v7, 38 +; CHECK-NEXT: v_readlane_b32 s43, v7, 39 +; CHECK-NEXT: s_mov_b32 s21, s20 +; CHECK-NEXT: s_mov_b32 s22, s20 +; CHECK-NEXT: s_mov_b32 s23, s20 +; CHECK-NEXT: v_readlane_b32 s37, v7, 33 +; CHECK-NEXT: v_readlane_b32 s38, v7, 34 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: image_sample_lz v4, v[1:2], s[44:51], s[24:27] dmask:0x1 -; CHECK-NEXT: s_nop 0 -; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[28:31] dmask:0x1 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s39, v7, 35 +; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4 ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0 @@ -124,69 +149,159 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b64 vcc, vcc ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 ; CHECK-NEXT: .LBB0_3: ; %Flow14 -; CHECK-NEXT: s_andn2_saveexec_b64 s[12:13], s[68:69] +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_readlane_b32 s12, v7, 32 +; CHECK-NEXT: v_readlane_b32 s13, v7, 33 +; CHECK-NEXT: v_readlane_b32 s14, v7, 34 +; CHECK-NEXT: v_readlane_b32 s15, v7, 35 +; CHECK-NEXT: v_readlane_b32 s16, v7, 36 +; CHECK-NEXT: v_readlane_b32 s17, v7, 37 +; CHECK-NEXT: v_readlane_b32 s18, v7, 38 +; CHECK-NEXT: v_readlane_b32 s19, v7, 39 +; CHECK-NEXT: v_writelane_b32 v7, s4, 40 +; CHECK-NEXT: v_writelane_b32 v7, s5, 41 +; CHECK-NEXT: v_writelane_b32 v7, s6, 42 +; CHECK-NEXT: v_writelane_b32 v7, s7, 43 +; CHECK-NEXT: v_writelane_b32 v7, s8, 44 +; CHECK-NEXT: v_writelane_b32 v7, s9, 45 +; CHECK-NEXT: v_writelane_b32 v7, s10, 46 +; CHECK-NEXT: v_writelane_b32 v7, s11, 47 +; CHECK-NEXT: v_writelane_b32 v7, s12, 48 +; CHECK-NEXT: v_writelane_b32 v7, s13, 49 +; CHECK-NEXT: v_writelane_b32 v7, s14, 50 +; CHECK-NEXT: v_writelane_b32 v7, s15, 51 +; CHECK-NEXT: v_writelane_b32 v7, s16, 52 +; CHECK-NEXT: v_writelane_b32 v7, s17, 53 +; CHECK-NEXT: v_writelane_b32 v7, s18, 54 +; CHECK-NEXT: v_writelane_b32 v7, s19, 55 +; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v7, s52, 56 +; CHECK-NEXT: v_writelane_b32 v6, s60, 0 +; CHECK-NEXT: v_writelane_b32 v7, s53, 57 +; CHECK-NEXT: v_writelane_b32 v6, s61, 1 +; CHECK-NEXT: v_writelane_b32 v7, s54, 58 +; CHECK-NEXT: v_writelane_b32 v6, s62, 2 +; CHECK-NEXT: v_writelane_b32 v7, s55, 59 +; CHECK-NEXT: v_writelane_b32 v6, s63, 3 +; CHECK-NEXT: v_writelane_b32 v7, s56, 60 +; CHECK-NEXT: v_writelane_b32 v6, s64, 4 +; CHECK-NEXT: v_writelane_b32 v7, s57, 61 +; CHECK-NEXT: v_writelane_b32 v6, s65, 5 +; CHECK-NEXT: v_writelane_b32 v7, s58, 62 +; CHECK-NEXT: v_writelane_b32 v6, s66, 6 +; CHECK-NEXT: v_writelane_b32 v7, s59, 63 +; CHECK-NEXT: v_writelane_b32 v6, s67, 7 +; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27] ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.4: ; %bb32 -; CHECK-NEXT: s_and_saveexec_b64 s[14:15], s[34:35] -; CHECK-NEXT: s_xor_b64 s[14:15], exec, s[14:15] +; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[24:25] +; CHECK-NEXT: s_xor_b64 s[22:23], exec, s[8:9] ; CHECK-NEXT: s_cbranch_execz .LBB0_6 ; CHECK-NEXT: ; %bb.5: ; %bb43 -; CHECK-NEXT: s_mov_b32 s16, 0 -; CHECK-NEXT: s_mov_b32 s17, s16 -; CHECK-NEXT: v_mov_b32_e32 v2, s16 -; CHECK-NEXT: v_mov_b32_e32 v3, s17 -; CHECK-NEXT: s_mov_b32 s18, s16 -; CHECK-NEXT: s_mov_b32 s19, s16 -; CHECK-NEXT: image_sample_lz v1, v[2:3], s[4:11], s[16:19] dmask:0x1 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_mov_b64 s[4:5], s[36:37] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[40:41] -; CHECK-NEXT: s_mov_b64 s[10:11], s[42:43] -; CHECK-NEXT: v_readlane_b32 s36, v6, 0 -; CHECK-NEXT: v_readlane_b32 s44, v6, 8 -; CHECK-NEXT: v_readlane_b32 s45, v6, 9 -; CHECK-NEXT: v_readlane_b32 s46, v6, 10 -; CHECK-NEXT: v_readlane_b32 s47, v6, 11 -; CHECK-NEXT: v_readlane_b32 s48, v6, 12 -; CHECK-NEXT: v_readlane_b32 s49, v6, 13 -; CHECK-NEXT: v_readlane_b32 s50, v6, 14 -; CHECK-NEXT: v_readlane_b32 s51, v6, 15 -; CHECK-NEXT: v_readlane_b32 s37, v6, 1 -; CHECK-NEXT: v_readlane_b32 s38, v6, 2 -; CHECK-NEXT: v_readlane_b32 s39, v6, 3 -; CHECK-NEXT: v_readlane_b32 s40, v6, 4 -; CHECK-NEXT: v_readlane_b32 s41, v6, 5 -; CHECK-NEXT: image_sample_lz v0, v[2:3], s[44:51], s[20:23] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s42, v6, 6 -; CHECK-NEXT: v_readlane_b32 s43, v6, 7 -; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: s_mov_b64 s[42:43], s[10:11] -; CHECK-NEXT: v_mov_b32_e32 v3, v2 -; CHECK-NEXT: s_mov_b64 s[40:41], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: s_mov_b64 s[36:37], s[4:5] +; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: s_mov_b32 s9, s8 +; CHECK-NEXT: v_mov_b32_e32 v0, s8 +; CHECK-NEXT: v_readlane_b32 s36, v7, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, s9 +; CHECK-NEXT: s_mov_b32 s10, s8 +; CHECK-NEXT: s_mov_b32 s11, s8 +; CHECK-NEXT: v_readlane_b32 s37, v7, 1 +; CHECK-NEXT: v_readlane_b32 s38, v7, 2 +; CHECK-NEXT: v_readlane_b32 s39, v7, 3 +; CHECK-NEXT: v_readlane_b32 s40, v7, 4 +; CHECK-NEXT: v_readlane_b32 s41, v7, 5 +; CHECK-NEXT: v_readlane_b32 s42, v7, 6 +; CHECK-NEXT: v_readlane_b32 s43, v7, 7 +; CHECK-NEXT: v_readlane_b32 s44, v7, 8 +; CHECK-NEXT: v_readlane_b32 s45, v7, 9 +; CHECK-NEXT: v_readlane_b32 s46, v7, 10 +; CHECK-NEXT: v_readlane_b32 s47, v7, 11 +; CHECK-NEXT: v_readlane_b32 s48, v7, 12 +; CHECK-NEXT: v_readlane_b32 s49, v7, 13 +; CHECK-NEXT: v_readlane_b32 s50, v7, 14 +; CHECK-NEXT: v_readlane_b32 s51, v7, 15 +; CHECK-NEXT: image_sample_lz v2, v[0:1], s[36:43], s[8:11] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s36, v7, 16 +; CHECK-NEXT: v_readlane_b32 s44, v7, 24 +; CHECK-NEXT: v_readlane_b32 s45, v7, 25 +; CHECK-NEXT: v_readlane_b32 s46, v7, 26 +; CHECK-NEXT: v_readlane_b32 s47, v7, 27 +; CHECK-NEXT: v_readlane_b32 s48, v7, 28 +; CHECK-NEXT: v_readlane_b32 s49, v7, 29 +; CHECK-NEXT: v_readlane_b32 s50, v7, 30 +; CHECK-NEXT: v_readlane_b32 s51, v7, 31 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: v_mov_b32_e32 v4, v3 +; CHECK-NEXT: v_readlane_b32 s37, v7, 17 +; CHECK-NEXT: v_readlane_b32 s38, v7, 18 +; CHECK-NEXT: v_readlane_b32 s39, v7, 19 +; CHECK-NEXT: image_sample_lz v0, v[0:1], s[44:51], s[12:15] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s40, v7, 20 +; CHECK-NEXT: v_readlane_b32 s41, v7, 21 +; CHECK-NEXT: v_readlane_b32 s42, v7, 22 +; CHECK-NEXT: v_readlane_b32 s43, v7, 23 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dwordx3 v[1:3], off, s[16:19], 0 +; CHECK-NEXT: buffer_store_dwordx3 v[2:4], off, s[8:11], 0 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 ; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: .LBB0_6: ; %Flow12 -; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[14:15] +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23] +; CHECK-NEXT: v_readlane_b32 s52, v7, 40 +; CHECK-NEXT: v_readlane_b32 s53, v7, 41 +; CHECK-NEXT: v_readlane_b32 s54, v7, 42 +; CHECK-NEXT: v_readlane_b32 s55, v7, 43 +; CHECK-NEXT: v_readlane_b32 s56, v7, 44 +; CHECK-NEXT: v_readlane_b32 s57, v7, 45 +; CHECK-NEXT: v_readlane_b32 s58, v7, 46 +; CHECK-NEXT: v_readlane_b32 s59, v7, 47 +; CHECK-NEXT: v_readlane_b32 s60, v7, 48 +; CHECK-NEXT: v_readlane_b32 s61, v7, 49 +; CHECK-NEXT: v_readlane_b32 s62, v7, 50 +; CHECK-NEXT: v_readlane_b32 s63, v7, 51 +; CHECK-NEXT: v_readlane_b32 s64, v7, 52 +; CHECK-NEXT: v_readlane_b32 s65, v7, 53 +; CHECK-NEXT: v_readlane_b32 s66, v7, 54 +; CHECK-NEXT: v_readlane_b32 s67, v7, 55 +; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz .LBB0_9 ; CHECK-NEXT: ; %bb.7: ; %bb33.preheader ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 ; CHECK-NEXT: v_mov_b32_e32 v1, s6 +; CHECK-NEXT: v_readlane_b32 s36, v7, 56 ; CHECK-NEXT: s_mov_b32 s9, s8 ; CHECK-NEXT: s_mov_b32 s10, s8 ; CHECK-NEXT: s_mov_b32 s11, s8 ; CHECK-NEXT: v_mov_b32_e32 v2, s7 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_readlane_b32 s37, v7, 57 +; CHECK-NEXT: v_readlane_b32 s38, v7, 58 +; CHECK-NEXT: v_readlane_b32 s39, v7, 59 +; CHECK-NEXT: v_readlane_b32 s40, v7, 60 +; CHECK-NEXT: v_readlane_b32 s41, v7, 61 +; CHECK-NEXT: v_readlane_b32 s42, v7, 62 +; CHECK-NEXT: v_readlane_b32 s43, v7, 63 +; CHECK-NEXT: s_nop 4 ; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1 ; CHECK-NEXT: image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1 +; CHECK-NEXT: ; kill: killed $vgpr1_vgpr2 +; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37] ; CHECK-NEXT: s_and_b64 vcc, exec, 0 +; CHECK-NEXT: v_readlane_b32 s44, v6, 0 +; CHECK-NEXT: v_readlane_b32 s45, v6, 1 +; CHECK-NEXT: v_readlane_b32 s46, v6, 2 +; CHECK-NEXT: v_readlane_b32 s47, v6, 3 +; CHECK-NEXT: v_readlane_b32 s48, v6, 4 +; CHECK-NEXT: v_readlane_b32 s49, v6, 5 +; CHECK-NEXT: v_readlane_b32 s50, v6, 6 +; CHECK-NEXT: v_readlane_b32 s51, v6, 7 +; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39] +; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41] +; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43] +; CHECK-NEXT: ; kill: killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 +; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11 +; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v1, v4, v3 ; CHECK-NEXT: v_mul_f32_e32 v0, v1, v0 @@ -200,51 +315,27 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: .LBB0_9: ; %Flow13 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock -; CHECK-NEXT: s_or_b64 exec, exec, s[12:13] -; CHECK-NEXT: v_readlane_b32 s71, v5, 39 -; CHECK-NEXT: v_readlane_b32 s70, v5, 38 -; CHECK-NEXT: v_readlane_b32 s69, v5, 37 -; CHECK-NEXT: v_readlane_b32 s68, v5, 36 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_readlane_b32 s67, v5, 35 -; CHECK-NEXT: v_readlane_b32 s66, v5, 34 -; CHECK-NEXT: v_readlane_b32 s65, v5, 33 -; CHECK-NEXT: v_readlane_b32 s64, v5, 32 -; CHECK-NEXT: v_readlane_b32 s63, v5, 31 -; CHECK-NEXT: v_readlane_b32 s62, v5, 30 -; CHECK-NEXT: v_readlane_b32 s61, v5, 29 -; CHECK-NEXT: v_readlane_b32 s60, v5, 28 -; CHECK-NEXT: v_readlane_b32 s59, v5, 27 -; CHECK-NEXT: v_readlane_b32 s58, v5, 26 -; CHECK-NEXT: v_readlane_b32 s57, v5, 25 -; CHECK-NEXT: v_readlane_b32 s56, v5, 24 -; CHECK-NEXT: v_readlane_b32 s55, v5, 23 -; CHECK-NEXT: v_readlane_b32 s54, v5, 22 -; CHECK-NEXT: v_readlane_b32 s53, v5, 21 -; CHECK-NEXT: v_readlane_b32 s52, v5, 20 -; CHECK-NEXT: v_readlane_b32 s51, v5, 19 -; CHECK-NEXT: v_readlane_b32 s50, v5, 18 -; CHECK-NEXT: v_readlane_b32 s49, v5, 17 -; CHECK-NEXT: v_readlane_b32 s48, v5, 16 -; CHECK-NEXT: v_readlane_b32 s47, v5, 15 -; CHECK-NEXT: v_readlane_b32 s46, v5, 14 -; CHECK-NEXT: v_readlane_b32 s45, v5, 13 -; CHECK-NEXT: v_readlane_b32 s44, v5, 12 -; CHECK-NEXT: v_readlane_b32 s43, v5, 11 -; CHECK-NEXT: v_readlane_b32 s42, v5, 10 -; CHECK-NEXT: v_readlane_b32 s41, v5, 9 -; CHECK-NEXT: v_readlane_b32 s40, v5, 8 -; CHECK-NEXT: v_readlane_b32 s39, v5, 7 -; CHECK-NEXT: v_readlane_b32 s38, v5, 6 -; CHECK-NEXT: v_readlane_b32 s37, v5, 5 -; CHECK-NEXT: v_readlane_b32 s36, v5, 4 -; CHECK-NEXT: v_readlane_b32 s35, v5, 3 -; CHECK-NEXT: v_readlane_b32 s34, v5, 2 -; CHECK-NEXT: v_readlane_b32 s31, v5, 1 -; CHECK-NEXT: v_readlane_b32 s30, v5, 0 +; CHECK-NEXT: s_or_b64 exec, exec, s[20:21] +; CHECK-NEXT: v_readlane_b32 s67, v5, 15 +; CHECK-NEXT: v_readlane_b32 s66, v5, 14 +; CHECK-NEXT: v_readlane_b32 s65, v5, 13 +; CHECK-NEXT: v_readlane_b32 s64, v5, 12 +; CHECK-NEXT: v_readlane_b32 s63, v5, 11 +; CHECK-NEXT: v_readlane_b32 s62, v5, 10 +; CHECK-NEXT: v_readlane_b32 s53, v5, 9 +; CHECK-NEXT: v_readlane_b32 s52, v5, 8 +; CHECK-NEXT: v_readlane_b32 s51, v5, 7 +; CHECK-NEXT: v_readlane_b32 s50, v5, 6 +; CHECK-NEXT: v_readlane_b32 s49, v5, 5 +; CHECK-NEXT: v_readlane_b32 s48, v5, 4 +; CHECK-NEXT: v_readlane_b32 s47, v5, 3 +; CHECK-NEXT: v_readlane_b32 s46, v5, 2 +; CHECK-NEXT: v_readlane_b32 s37, v5, 1 +; CHECK-NEXT: v_readlane_b32 s36, v5, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index 55da485b91f67..8487e195de8e2 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -134,59 +134,59 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s64, 16 +; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: s_mov_b32 s50, s15 +; GCN-NEXT: s_mov_b32 s51, s14 +; GCN-NEXT: s_mov_b32 s52, s13 +; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec +; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] +; GCN-NEXT: s_mov_b64 s[62:63], exec ; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: v_readfirstlane_b32 s17, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc +; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] +; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 +; GCN-NEXT: s_mov_b32 s12, s53 +; GCN-NEXT: s_mov_b32 s13, s52 +; GCN-NEXT: s_mov_b32 s14, s51 +; GCN-NEXT: s_mov_b32 s15, s50 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN-NEXT: s_xor_b64 exec, exec, s[64:65] ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: s_mov_b64 exec, s[62:63] +; GCN-NEXT: v_readlane_b32 s65, v40, 17 +; GCN-NEXT: v_readlane_b32 s64, v40, 16 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -218,59 +218,59 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s64, 16 +; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: s_mov_b32 s50, s15 +; GISEL-NEXT: s_mov_b32 s51, s14 +; GISEL-NEXT: s_mov_b32 s52, s13 +; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec +; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] +; GISEL-NEXT: s_mov_b64 s[62:63], exec ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc +; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] +; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 +; GISEL-NEXT: s_mov_b32 s12, s53 +; GISEL-NEXT: s_mov_b32 s13, s52 +; GISEL-NEXT: s_mov_b32 s14, s51 +; GISEL-NEXT: s_mov_b32 s15, s50 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65] ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: s_mov_b64 exec, s[62:63] +; GISEL-NEXT: v_readlane_b32 s65, v40, 17 +; GISEL-NEXT: v_readlane_b32 s64, v40, 16 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -306,62 +306,62 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s64, 16 +; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: s_mov_b32 s50, s15 +; GCN-NEXT: s_mov_b32 s51, s14 +; GCN-NEXT: s_mov_b32 s52, s13 +; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec +; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] +; GCN-NEXT: s_mov_b64 s[62:63], exec ; GCN-NEXT: v_mov_b32_e32 v2, 0x7b ; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: v_readfirstlane_b32 s17, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc +; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] +; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 +; GCN-NEXT: s_mov_b32 s12, s53 +; GCN-NEXT: s_mov_b32 s13, s52 +; GCN-NEXT: s_mov_b32 s14, s51 +; GCN-NEXT: s_mov_b32 s15, s50 ; GCN-NEXT: v_mov_b32_e32 v0, v2 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 ; GCN-NEXT: ; implicit-def: $vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN-NEXT: s_xor_b64 exec, exec, s[64:65] ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: s_mov_b64 exec, s[62:63] +; GCN-NEXT: v_readlane_b32 s65, v40, 17 +; GCN-NEXT: v_readlane_b32 s64, v40, 16 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -393,60 +393,60 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s64, 16 +; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: s_mov_b32 s50, s15 +; GISEL-NEXT: s_mov_b32 s51, s14 +; GISEL-NEXT: s_mov_b32 s52, s13 +; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec +; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] +; GISEL-NEXT: s_mov_b64 s[62:63], exec ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] +; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 +; GISEL-NEXT: s_mov_b32 s12, s53 +; GISEL-NEXT: s_mov_b32 s13, s52 +; GISEL-NEXT: s_mov_b32 s14, s51 +; GISEL-NEXT: s_mov_b32 s15, s50 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65] ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: s_mov_b64 exec, s[62:63] +; GISEL-NEXT: v_readlane_b32 s65, v40, 17 +; GISEL-NEXT: v_readlane_b32 s64, v40, 16 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -482,61 +482,61 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s64, 16 +; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: s_mov_b32 s50, s15 +; GCN-NEXT: s_mov_b32 s51, s14 +; GCN-NEXT: s_mov_b32 s52, s13 +; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] -; GCN-NEXT: s_mov_b64 s[46:47], exec +; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] +; GCN-NEXT: s_mov_b64 s[62:63], exec ; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: v_readfirstlane_b32 s17, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc +; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] +; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 +; GCN-NEXT: s_mov_b32 s12, s53 +; GCN-NEXT: s_mov_b32 s13, s52 +; GCN-NEXT: s_mov_b32 s14, s51 +; GCN-NEXT: s_mov_b32 s15, s50 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_mov_b32_e32 v2, v0 ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[48:49] +; GCN-NEXT: s_xor_b64 exec, exec, s[64:65] ; GCN-NEXT: s_cbranch_execnz .LBB4_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[46:47] +; GCN-NEXT: s_mov_b64 exec, s[62:63] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s65, v40, 17 +; GCN-NEXT: v_readlane_b32 s64, v40, 16 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -568,61 +568,61 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s64, 16 +; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: s_mov_b32 s50, s15 +; GISEL-NEXT: s_mov_b32 s51, s14 +; GISEL-NEXT: s_mov_b32 s52, s13 +; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: s_mov_b64 s[46:47], exec +; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] +; GISEL-NEXT: s_mov_b64 s[62:63], exec ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc +; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] +; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 +; GISEL-NEXT: s_mov_b32 s12, s53 +; GISEL-NEXT: s_mov_b32 s13, s52 +; GISEL-NEXT: s_mov_b32 s14, s51 +; GISEL-NEXT: s_mov_b32 s15, s50 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: v_mov_b32_e32 v1, v0 ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49] +; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65] ; GISEL-NEXT: s_cbranch_execnz .LBB4_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[46:47] +; GISEL-NEXT: s_mov_b64 exec, s[62:63] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s65, v40, 17 +; GISEL-NEXT: v_readlane_b32 s64, v40, 16 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -659,70 +659,70 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s64, 16 +; GCN-NEXT: v_writelane_b32 v40, s65, 17 +; GCN-NEXT: v_writelane_b32 v40, s66, 18 +; GCN-NEXT: v_writelane_b32 v40, s67, 19 +; GCN-NEXT: s_mov_b32 s50, s15 +; GCN-NEXT: s_mov_b32 s51, s14 +; GCN-NEXT: s_mov_b32 s52, s13 +; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc +; GCN-NEXT: s_and_saveexec_b64 s[62:63], vcc ; GCN-NEXT: s_cbranch_execz .LBB5_4 ; GCN-NEXT: ; %bb.1: ; %bb1 -; GCN-NEXT: s_mov_b64 s[48:49], exec +; GCN-NEXT: s_mov_b64 s[64:65], exec ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: v_readfirstlane_b32 s17, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[50:51], vcc -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN-NEXT: s_and_saveexec_b64 s[66:67], vcc +; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] +; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 +; GCN-NEXT: s_mov_b32 s12, s53 +; GCN-NEXT: s_mov_b32 s13, s52 +; GCN-NEXT: s_mov_b32 s14, s51 +; GCN-NEXT: s_mov_b32 s15, s50 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GCN-NEXT: ; implicit-def: $vgpr31 -; GCN-NEXT: s_xor_b64 exec, exec, s[50:51] +; GCN-NEXT: s_xor_b64 exec, exec, s[66:67] ; GCN-NEXT: s_cbranch_execnz .LBB5_2 ; GCN-NEXT: ; %bb.3: -; GCN-NEXT: s_mov_b64 exec, s[48:49] +; GCN-NEXT: s_mov_b64 exec, s[64:65] ; GCN-NEXT: .LBB5_4: ; %bb2 -; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: s_or_b64 exec, exec, s[62:63] +; GCN-NEXT: v_readlane_b32 s67, v40, 19 +; GCN-NEXT: v_readlane_b32 s66, v40, 18 +; GCN-NEXT: v_readlane_b32 s65, v40, 17 +; GCN-NEXT: v_readlane_b32 s64, v40, 16 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -754,70 +754,70 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: s_mov_b32 s42, s15 -; GISEL-NEXT: s_mov_b32 s43, s14 -; GISEL-NEXT: s_mov_b32 s44, s13 -; GISEL-NEXT: s_mov_b32 s45, s12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s64, 16 +; GISEL-NEXT: v_writelane_b32 v40, s65, 17 +; GISEL-NEXT: v_writelane_b32 v40, s66, 18 +; GISEL-NEXT: v_writelane_b32 v40, s67, 19 +; GISEL-NEXT: s_mov_b32 s50, s15 +; GISEL-NEXT: s_mov_b32 s51, s14 +; GISEL-NEXT: s_mov_b32 s52, s13 +; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] -; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[62:63], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GISEL-NEXT: ; %bb.1: ; %bb1 -; GISEL-NEXT: s_mov_b64 s[48:49], exec +; GISEL-NEXT: s_mov_b64 s[64:65], exec ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[50:51], vcc -; GISEL-NEXT: s_mov_b64 s[4:5], s[40:41] -; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] +; GISEL-NEXT: s_and_saveexec_b64 s[66:67], vcc +; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] +; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] -; GISEL-NEXT: s_mov_b32 s12, s45 -; GISEL-NEXT: s_mov_b32 s13, s44 -; GISEL-NEXT: s_mov_b32 s14, s43 -; GISEL-NEXT: s_mov_b32 s15, s42 +; GISEL-NEXT: s_mov_b32 s12, s53 +; GISEL-NEXT: s_mov_b32 s13, s52 +; GISEL-NEXT: s_mov_b32 s14, s51 +; GISEL-NEXT: s_mov_b32 s15, s50 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GISEL-NEXT: ; implicit-def: $vgpr0 ; GISEL-NEXT: ; implicit-def: $vgpr31 -; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51] +; GISEL-NEXT: s_xor_b64 exec, exec, s[66:67] ; GISEL-NEXT: s_cbranch_execnz .LBB5_2 ; GISEL-NEXT: ; %bb.3: -; GISEL-NEXT: s_mov_b64 exec, s[48:49] +; GISEL-NEXT: s_mov_b64 exec, s[64:65] ; GISEL-NEXT: .LBB5_4: ; %bb2 -; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: s_or_b64 exec, exec, s[62:63] +; GISEL-NEXT: v_readlane_b32 s67, v40, 19 +; GISEL-NEXT: v_readlane_b32 s66, v40, 18 +; GISEL-NEXT: v_readlane_b32 s65, v40, 17 +; GISEL-NEXT: v_readlane_b32 s64, v40, 16 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -859,32 +859,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 ; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v0 @@ -898,32 +882,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -953,32 +921,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 ; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0 @@ -992,32 +944,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -1052,32 +988,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v41, s35, 3 ; GCN-NEXT: v_writelane_b32 v41, s36, 4 ; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s38, 6 -; GCN-NEXT: v_writelane_b32 v41, s39, 7 -; GCN-NEXT: v_writelane_b32 v41, s40, 8 -; GCN-NEXT: v_writelane_b32 v41, s41, 9 -; GCN-NEXT: v_writelane_b32 v41, s42, 10 -; GCN-NEXT: v_writelane_b32 v41, s43, 11 -; GCN-NEXT: v_writelane_b32 v41, s44, 12 -; GCN-NEXT: v_writelane_b32 v41, s45, 13 -; GCN-NEXT: v_writelane_b32 v41, s46, 14 -; GCN-NEXT: v_writelane_b32 v41, s47, 15 -; GCN-NEXT: v_writelane_b32 v41, s48, 16 -; GCN-NEXT: v_writelane_b32 v41, s49, 17 -; GCN-NEXT: v_writelane_b32 v41, s50, 18 -; GCN-NEXT: v_writelane_b32 v41, s51, 19 -; GCN-NEXT: v_writelane_b32 v41, s52, 20 -; GCN-NEXT: v_writelane_b32 v41, s53, 21 -; GCN-NEXT: v_writelane_b32 v41, s54, 22 -; GCN-NEXT: v_writelane_b32 v41, s55, 23 -; GCN-NEXT: v_writelane_b32 v41, s56, 24 -; GCN-NEXT: v_writelane_b32 v41, s57, 25 -; GCN-NEXT: v_writelane_b32 v41, s58, 26 -; GCN-NEXT: v_writelane_b32 v41, s59, 27 -; GCN-NEXT: v_writelane_b32 v41, s60, 28 -; GCN-NEXT: v_writelane_b32 v41, s61, 29 -; GCN-NEXT: v_writelane_b32 v41, s62, 30 -; GCN-NEXT: v_writelane_b32 v41, s63, 31 +; GCN-NEXT: v_writelane_b32 v41, s46, 6 +; GCN-NEXT: v_writelane_b32 v41, s47, 7 +; GCN-NEXT: v_writelane_b32 v41, s48, 8 +; GCN-NEXT: v_writelane_b32 v41, s49, 9 +; GCN-NEXT: v_writelane_b32 v41, s50, 10 +; GCN-NEXT: v_writelane_b32 v41, s51, 11 +; GCN-NEXT: v_writelane_b32 v41, s52, 12 +; GCN-NEXT: v_writelane_b32 v41, s53, 13 +; GCN-NEXT: v_writelane_b32 v41, s62, 14 +; GCN-NEXT: v_writelane_b32 v41, s63, 15 ; GCN-NEXT: v_mov_b32_e32 v40, v0 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1093,32 +1013,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v40 -; GCN-NEXT: v_readlane_b32 s63, v41, 31 -; GCN-NEXT: v_readlane_b32 s62, v41, 30 -; GCN-NEXT: v_readlane_b32 s61, v41, 29 -; GCN-NEXT: v_readlane_b32 s60, v41, 28 -; GCN-NEXT: v_readlane_b32 s59, v41, 27 -; GCN-NEXT: v_readlane_b32 s58, v41, 26 -; GCN-NEXT: v_readlane_b32 s57, v41, 25 -; GCN-NEXT: v_readlane_b32 s56, v41, 24 -; GCN-NEXT: v_readlane_b32 s55, v41, 23 -; GCN-NEXT: v_readlane_b32 s54, v41, 22 -; GCN-NEXT: v_readlane_b32 s53, v41, 21 -; GCN-NEXT: v_readlane_b32 s52, v41, 20 -; GCN-NEXT: v_readlane_b32 s51, v41, 19 -; GCN-NEXT: v_readlane_b32 s50, v41, 18 -; GCN-NEXT: v_readlane_b32 s49, v41, 17 -; GCN-NEXT: v_readlane_b32 s48, v41, 16 -; GCN-NEXT: v_readlane_b32 s47, v41, 15 -; GCN-NEXT: v_readlane_b32 s46, v41, 14 -; GCN-NEXT: v_readlane_b32 s45, v41, 13 -; GCN-NEXT: v_readlane_b32 s44, v41, 12 -; GCN-NEXT: v_readlane_b32 s43, v41, 11 -; GCN-NEXT: v_readlane_b32 s42, v41, 10 -; GCN-NEXT: v_readlane_b32 s41, v41, 9 -; GCN-NEXT: v_readlane_b32 s40, v41, 8 -; GCN-NEXT: v_readlane_b32 s39, v41, 7 -; GCN-NEXT: v_readlane_b32 s38, v41, 6 +; GCN-NEXT: v_readlane_b32 s63, v41, 15 +; GCN-NEXT: v_readlane_b32 s62, v41, 14 +; GCN-NEXT: v_readlane_b32 s53, v41, 13 +; GCN-NEXT: v_readlane_b32 s52, v41, 12 +; GCN-NEXT: v_readlane_b32 s51, v41, 11 +; GCN-NEXT: v_readlane_b32 s50, v41, 10 +; GCN-NEXT: v_readlane_b32 s49, v41, 9 +; GCN-NEXT: v_readlane_b32 s48, v41, 8 +; GCN-NEXT: v_readlane_b32 s47, v41, 7 +; GCN-NEXT: v_readlane_b32 s46, v41, 6 ; GCN-NEXT: v_readlane_b32 s37, v41, 5 ; GCN-NEXT: v_readlane_b32 s36, v41, 4 ; GCN-NEXT: v_readlane_b32 s35, v41, 3 @@ -1150,32 +1054,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v41, s35, 3 ; GISEL-NEXT: v_writelane_b32 v41, s36, 4 ; GISEL-NEXT: v_writelane_b32 v41, s37, 5 -; GISEL-NEXT: v_writelane_b32 v41, s38, 6 -; GISEL-NEXT: v_writelane_b32 v41, s39, 7 -; GISEL-NEXT: v_writelane_b32 v41, s40, 8 -; GISEL-NEXT: v_writelane_b32 v41, s41, 9 -; GISEL-NEXT: v_writelane_b32 v41, s42, 10 -; GISEL-NEXT: v_writelane_b32 v41, s43, 11 -; GISEL-NEXT: v_writelane_b32 v41, s44, 12 -; GISEL-NEXT: v_writelane_b32 v41, s45, 13 -; GISEL-NEXT: v_writelane_b32 v41, s46, 14 -; GISEL-NEXT: v_writelane_b32 v41, s47, 15 -; GISEL-NEXT: v_writelane_b32 v41, s48, 16 -; GISEL-NEXT: v_writelane_b32 v41, s49, 17 -; GISEL-NEXT: v_writelane_b32 v41, s50, 18 -; GISEL-NEXT: v_writelane_b32 v41, s51, 19 -; GISEL-NEXT: v_writelane_b32 v41, s52, 20 -; GISEL-NEXT: v_writelane_b32 v41, s53, 21 -; GISEL-NEXT: v_writelane_b32 v41, s54, 22 -; GISEL-NEXT: v_writelane_b32 v41, s55, 23 -; GISEL-NEXT: v_writelane_b32 v41, s56, 24 -; GISEL-NEXT: v_writelane_b32 v41, s57, 25 -; GISEL-NEXT: v_writelane_b32 v41, s58, 26 -; GISEL-NEXT: v_writelane_b32 v41, s59, 27 -; GISEL-NEXT: v_writelane_b32 v41, s60, 28 -; GISEL-NEXT: v_writelane_b32 v41, s61, 29 -; GISEL-NEXT: v_writelane_b32 v41, s62, 30 -; GISEL-NEXT: v_writelane_b32 v41, s63, 31 +; GISEL-NEXT: v_writelane_b32 v41, s46, 6 +; GISEL-NEXT: v_writelane_b32 v41, s47, 7 +; GISEL-NEXT: v_writelane_b32 v41, s48, 8 +; GISEL-NEXT: v_writelane_b32 v41, s49, 9 +; GISEL-NEXT: v_writelane_b32 v41, s50, 10 +; GISEL-NEXT: v_writelane_b32 v41, s51, 11 +; GISEL-NEXT: v_writelane_b32 v41, s52, 12 +; GISEL-NEXT: v_writelane_b32 v41, s53, 13 +; GISEL-NEXT: v_writelane_b32 v41, s62, 14 +; GISEL-NEXT: v_writelane_b32 v41, s63, 15 ; GISEL-NEXT: v_mov_b32_e32 v40, v0 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1191,32 +1079,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v40 -; GISEL-NEXT: v_readlane_b32 s63, v41, 31 -; GISEL-NEXT: v_readlane_b32 s62, v41, 30 -; GISEL-NEXT: v_readlane_b32 s61, v41, 29 -; GISEL-NEXT: v_readlane_b32 s60, v41, 28 -; GISEL-NEXT: v_readlane_b32 s59, v41, 27 -; GISEL-NEXT: v_readlane_b32 s58, v41, 26 -; GISEL-NEXT: v_readlane_b32 s57, v41, 25 -; GISEL-NEXT: v_readlane_b32 s56, v41, 24 -; GISEL-NEXT: v_readlane_b32 s55, v41, 23 -; GISEL-NEXT: v_readlane_b32 s54, v41, 22 -; GISEL-NEXT: v_readlane_b32 s53, v41, 21 -; GISEL-NEXT: v_readlane_b32 s52, v41, 20 -; GISEL-NEXT: v_readlane_b32 s51, v41, 19 -; GISEL-NEXT: v_readlane_b32 s50, v41, 18 -; GISEL-NEXT: v_readlane_b32 s49, v41, 17 -; GISEL-NEXT: v_readlane_b32 s48, v41, 16 -; GISEL-NEXT: v_readlane_b32 s47, v41, 15 -; GISEL-NEXT: v_readlane_b32 s46, v41, 14 -; GISEL-NEXT: v_readlane_b32 s45, v41, 13 -; GISEL-NEXT: v_readlane_b32 s44, v41, 12 -; GISEL-NEXT: v_readlane_b32 s43, v41, 11 -; GISEL-NEXT: v_readlane_b32 s42, v41, 10 -; GISEL-NEXT: v_readlane_b32 s41, v41, 9 -; GISEL-NEXT: v_readlane_b32 s40, v41, 8 -; GISEL-NEXT: v_readlane_b32 s39, v41, 7 -; GISEL-NEXT: v_readlane_b32 s38, v41, 6 +; GISEL-NEXT: v_readlane_b32 s63, v41, 15 +; GISEL-NEXT: v_readlane_b32 s62, v41, 14 +; GISEL-NEXT: v_readlane_b32 s53, v41, 13 +; GISEL-NEXT: v_readlane_b32 s52, v41, 12 +; GISEL-NEXT: v_readlane_b32 s51, v41, 11 +; GISEL-NEXT: v_readlane_b32 s50, v41, 10 +; GISEL-NEXT: v_readlane_b32 s49, v41, 9 +; GISEL-NEXT: v_readlane_b32 s48, v41, 8 +; GISEL-NEXT: v_readlane_b32 s47, v41, 7 +; GISEL-NEXT: v_readlane_b32 s46, v41, 6 ; GISEL-NEXT: v_readlane_b32 s37, v41, 5 ; GISEL-NEXT: v_readlane_b32 s36, v41, 4 ; GISEL-NEXT: v_readlane_b32 s35, v41, 3 @@ -1255,32 +1127,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 @@ -1296,32 +1152,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -1351,32 +1191,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1 @@ -1392,32 +1216,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v2 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -1452,32 +1260,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: v_writelane_b32 v40, s46, 6 +; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s48, 8 +; GCN-NEXT: v_writelane_b32 v40, s49, 9 +; GCN-NEXT: v_writelane_b32 v40, s50, 10 +; GCN-NEXT: v_writelane_b32 v40, s51, 11 +; GCN-NEXT: v_writelane_b32 v40, s52, 12 +; GCN-NEXT: v_writelane_b32 v40, s53, 13 +; GCN-NEXT: v_writelane_b32 v40, s62, 14 +; GCN-NEXT: v_writelane_b32 v40, s63, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s6, v0 @@ -1490,32 +1282,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s63, v40, 15 +; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s53, v40, 13 +; GCN-NEXT: v_readlane_b32 s52, v40, 12 +; GCN-NEXT: v_readlane_b32 s51, v40, 11 +; GCN-NEXT: v_readlane_b32 s50, v40, 10 +; GCN-NEXT: v_readlane_b32 s49, v40, 9 +; GCN-NEXT: v_readlane_b32 s48, v40, 8 +; GCN-NEXT: v_readlane_b32 s47, v40, 7 +; GCN-NEXT: v_readlane_b32 s46, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -1545,32 +1321,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: v_writelane_b32 v40, s46, 6 +; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s48, 8 +; GISEL-NEXT: v_writelane_b32 v40, s49, 9 +; GISEL-NEXT: v_writelane_b32 v40, s50, 10 +; GISEL-NEXT: v_writelane_b32 v40, s51, 11 +; GISEL-NEXT: v_writelane_b32 v40, s52, 12 +; GISEL-NEXT: v_writelane_b32 v40, s53, 13 +; GISEL-NEXT: v_writelane_b32 v40, s62, 14 +; GISEL-NEXT: v_writelane_b32 v40, s63, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0 @@ -1583,32 +1343,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s63, v40, 15 +; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s53, v40, 13 +; GISEL-NEXT: v_readlane_b32 s52, v40, 12 +; GISEL-NEXT: v_readlane_b32 s51, v40, 11 +; GISEL-NEXT: v_readlane_b32 s50, v40, 10 +; GISEL-NEXT: v_readlane_b32 s49, v40, 9 +; GISEL-NEXT: v_readlane_b32 s48, v40, 8 +; GISEL-NEXT: v_readlane_b32 s47, v40, 7 +; GISEL-NEXT: v_readlane_b32 s46, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir index dd73e65de7cb6..55de5dd133700 100644 --- a/llvm/test/CodeGen/AMDGPU/issue48473.mir +++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir @@ -43,7 +43,7 @@ # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 # CHECK-LABEL: name: issue48473 -# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 +# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 --- name: issue48473 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll index dbe95a8091932..4fd9fc95b8532 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll @@ -113,20 +113,20 @@ exit: define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; GFX9-SDAG-LABEL: test_call: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s2 -; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-SDAG-NEXT: s_mov_b32 s50, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s2 +; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1] ; GFX9-SDAG-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-SDAG-NEXT: s_mov_b32 s6, src_pops_exiting_wave_id -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], 36 -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -135,20 +135,20 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; ; GFX9-GISEL-LABEL: test_call: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s2 -; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-GISEL-NEXT: s_mov_b32 s50, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s2 +; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], 36 ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -157,12 +157,12 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; ; GFX10-LABEL: test_call: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s2 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s2 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 @@ -171,8 +171,8 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll index 584dd2700c419..c6a412a9f88b0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll @@ -1727,14 +1727,9 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-LABEL: v_maximum_v16f32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 ; GFX7-NEXT: v_max_f32_e32 v1, v1, v17 ; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32 -; GFX7-NEXT: v_writelane_b32 v31, s30, 0 -; GFX7-NEXT: v_writelane_b32 v31, s31, 1 ; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 ; GFX7-NEXT: v_max_f32_e32 v2, v2, v18 ; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 @@ -1743,7 +1738,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_max_f32_e32 v19, v0, v16 ; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX7-NEXT: v_max_f32_e32 v16, v14, v30 -; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 +; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX7-NEXT: v_max_f32_e32 v4, v4, v20 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1765,7 +1760,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX7-NEXT: v_max_f32_e32 v13, v13, v29 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] +; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] ; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1779,29 +1774,18 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] ; GFX7-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] ; GFX7-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] -; GFX7-NEXT: v_readlane_b32 s31, v31, 1 -; GFX7-NEXT: v_readlane_b32 s30, v31, 0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_max_f32_e32 v16, v15, v17 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v16f32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 ; GFX8-NEXT: v_max_f32_e32 v1, v1, v17 ; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32 -; GFX8-NEXT: v_writelane_b32 v31, s30, 0 -; GFX8-NEXT: v_writelane_b32 v31, s31, 1 ; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 ; GFX8-NEXT: v_max_f32_e32 v2, v2, v18 ; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 @@ -1810,7 +1794,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_max_f32_e32 v19, v0, v16 ; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX8-NEXT: v_max_f32_e32 v16, v14, v30 -; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 +; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 ; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX8-NEXT: v_max_f32_e32 v4, v4, v20 ; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1832,7 +1816,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX8-NEXT: v_max_f32_e32 v13, v13, v29 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1846,29 +1830,18 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] ; GFX8-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] ; GFX8-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] -; GFX8-NEXT: v_readlane_b32 s31, v31, 1 -; GFX8-NEXT: v_readlane_b32 s30, v31, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_max_f32_e32 v16, v15, v17 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v16f32: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 ; GFX900-NEXT: v_max_f32_e32 v1, v1, v17 ; GFX900-NEXT: buffer_load_dword v17, off, s[0:3], s32 -; GFX900-NEXT: v_writelane_b32 v31, s30, 0 -; GFX900-NEXT: v_writelane_b32 v31, s31, 1 ; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 ; GFX900-NEXT: v_max_f32_e32 v2, v2, v18 ; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 @@ -1877,7 +1850,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_max_f32_e32 v19, v0, v16 ; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX900-NEXT: v_max_f32_e32 v16, v14, v30 -; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 +; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 ; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX900-NEXT: v_max_f32_e32 v4, v4, v20 ; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1899,7 +1872,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX900-NEXT: v_max_f32_e32 v13, v13, v29 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] +; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] ; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1913,16 +1886,10 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] ; GFX900-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] ; GFX900-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] -; GFX900-NEXT: v_readlane_b32 s31, v31, 1 -; GFX900-NEXT: v_readlane_b32 s30, v31, 0 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_max_f32_e32 v16, v15, v17 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 ; GFX900-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v16f32: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index e354ec6fb3dd7..f7ce72efa4373 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -2008,15 +2008,8 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-LABEL: v_maximum_v16f64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX7-NEXT: v_writelane_b32 v34, s30, 0 -; GFX7-NEXT: v_writelane_b32 v34, s31, 1 -; GFX7-NEXT: v_writelane_b32 v34, s34, 2 -; GFX7-NEXT: v_writelane_b32 v34, s35, 3 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] ; GFX7-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32] @@ -2102,14 +2095,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32] +; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] ; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31] +; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33] +; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] ; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2126,31 +2119,16 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31] -; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35] -; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35] -; GFX7-NEXT: v_readlane_b32 s35, v34, 3 -; GFX7-NEXT: v_readlane_b32 s34, v34, 2 -; GFX7-NEXT: v_readlane_b32 s31, v34, 1 -; GFX7-NEXT: v_readlane_b32 s30, v34, 0 -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] +; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v16f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX8-NEXT: v_writelane_b32 v34, s30, 0 -; GFX8-NEXT: v_writelane_b32 v34, s31, 1 -; GFX8-NEXT: v_writelane_b32 v34, s34, 2 -; GFX8-NEXT: v_writelane_b32 v34, s35, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] ; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32] @@ -2236,14 +2214,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32] +; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] ; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33] +; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] ; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2260,31 +2238,16 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31] -; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35] -; GFX8-NEXT: v_readlane_b32 s35, v34, 3 -; GFX8-NEXT: v_readlane_b32 s34, v34, 2 -; GFX8-NEXT: v_readlane_b32 s31, v34, 1 -; GFX8-NEXT: v_readlane_b32 s30, v34, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v16f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX900-NEXT: v_writelane_b32 v34, s30, 0 -; GFX900-NEXT: v_writelane_b32 v34, s31, 1 -; GFX900-NEXT: v_writelane_b32 v34, s34, 2 -; GFX900-NEXT: v_writelane_b32 v34, s35, 3 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] ; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[31:32] @@ -2370,14 +2333,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32] +; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] ; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31] +; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33] +; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] ; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2394,17 +2357,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31] -; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35] -; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35] -; GFX900-NEXT: v_readlane_b32 s35, v34, 3 -; GFX900-NEXT: v_readlane_b32 s34, v34, 2 -; GFX900-NEXT: v_readlane_b32 s31, v34, 1 -; GFX900-NEXT: v_readlane_b32 s30, v34, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] +; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll index 9962433134073..7fe4f9be2727d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll @@ -1727,14 +1727,9 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-LABEL: v_minimum_v16f32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 ; GFX7-NEXT: v_min_f32_e32 v1, v1, v17 ; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32 -; GFX7-NEXT: v_writelane_b32 v31, s30, 0 -; GFX7-NEXT: v_writelane_b32 v31, s31, 1 ; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 ; GFX7-NEXT: v_min_f32_e32 v2, v2, v18 ; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 @@ -1743,7 +1738,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_min_f32_e32 v19, v0, v16 ; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX7-NEXT: v_min_f32_e32 v16, v14, v30 -; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 +; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX7-NEXT: v_min_f32_e32 v4, v4, v20 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1765,7 +1760,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX7-NEXT: v_min_f32_e32 v13, v13, v29 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] +; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] ; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1779,29 +1774,18 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] ; GFX7-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] ; GFX7-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] -; GFX7-NEXT: v_readlane_b32 s31, v31, 1 -; GFX7-NEXT: v_readlane_b32 s30, v31, 0 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_min_f32_e32 v16, v15, v17 ; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 ; GFX7-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v16f32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 ; GFX8-NEXT: v_min_f32_e32 v1, v1, v17 ; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32 -; GFX8-NEXT: v_writelane_b32 v31, s30, 0 -; GFX8-NEXT: v_writelane_b32 v31, s31, 1 ; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 ; GFX8-NEXT: v_min_f32_e32 v2, v2, v18 ; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 @@ -1810,7 +1794,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_min_f32_e32 v19, v0, v16 ; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX8-NEXT: v_min_f32_e32 v16, v14, v30 -; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 +; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 ; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX8-NEXT: v_min_f32_e32 v4, v4, v20 ; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1832,7 +1816,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX8-NEXT: v_min_f32_e32 v13, v13, v29 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1846,29 +1830,18 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] ; GFX8-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] ; GFX8-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] -; GFX8-NEXT: v_readlane_b32 s31, v31, 1 -; GFX8-NEXT: v_readlane_b32 s30, v31, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_min_f32_e32 v16, v15, v17 ; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 ; GFX8-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v16f32: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 ; GFX900-NEXT: v_min_f32_e32 v1, v1, v17 ; GFX900-NEXT: buffer_load_dword v17, off, s[0:3], s32 -; GFX900-NEXT: v_writelane_b32 v31, s30, 0 -; GFX900-NEXT: v_writelane_b32 v31, s31, 1 ; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 ; GFX900-NEXT: v_min_f32_e32 v2, v2, v18 ; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 @@ -1877,7 +1850,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_min_f32_e32 v19, v0, v16 ; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX900-NEXT: v_min_f32_e32 v16, v14, v30 -; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 +; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 ; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX900-NEXT: v_min_f32_e32 v4, v4, v20 ; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1899,7 +1872,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX900-NEXT: v_min_f32_e32 v13, v13, v29 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] +; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] ; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1913,16 +1886,10 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] ; GFX900-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] ; GFX900-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] -; GFX900-NEXT: v_readlane_b32 s31, v31, 1 -; GFX900-NEXT: v_readlane_b32 s30, v31, 0 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_min_f32_e32 v16, v15, v17 ; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 ; GFX900-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v16f32: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index 71fdd691a1512..ab20fd88091d9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -2008,15 +2008,8 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-LABEL: v_minimum_v16f64: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX7-NEXT: s_mov_b64 exec, s[4:5] ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX7-NEXT: v_writelane_b32 v34, s30, 0 -; GFX7-NEXT: v_writelane_b32 v34, s31, 1 -; GFX7-NEXT: v_writelane_b32 v34, s34, 2 -; GFX7-NEXT: v_writelane_b32 v34, s35, 3 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] ; GFX7-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32] @@ -2102,14 +2095,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32] +; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] ; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31] +; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33] +; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] ; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2126,31 +2119,16 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31] -; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35] -; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35] -; GFX7-NEXT: v_readlane_b32 s35, v34, 3 -; GFX7-NEXT: v_readlane_b32 s34, v34, 2 -; GFX7-NEXT: v_readlane_b32 s31, v34, 1 -; GFX7-NEXT: v_readlane_b32 s30, v34, 0 -; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX7-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] +; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v16f64: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX8-NEXT: v_writelane_b32 v34, s30, 0 -; GFX8-NEXT: v_writelane_b32 v34, s31, 1 -; GFX8-NEXT: v_writelane_b32 v34, s34, 2 -; GFX8-NEXT: v_writelane_b32 v34, s35, 3 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] ; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32] @@ -2236,14 +2214,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32] +; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] ; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31] +; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33] +; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] ; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2260,31 +2238,16 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31] -; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35] -; GFX8-NEXT: v_readlane_b32 s35, v34, 3 -; GFX8-NEXT: v_readlane_b32 s34, v34, 2 -; GFX8-NEXT: v_readlane_b32 s31, v34, 1 -; GFX8-NEXT: v_readlane_b32 s30, v34, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v16f64: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 -; GFX900-NEXT: v_writelane_b32 v34, s30, 0 -; GFX900-NEXT: v_writelane_b32 v34, s31, 1 -; GFX900-NEXT: v_writelane_b32 v34, s34, 2 -; GFX900-NEXT: v_writelane_b32 v34, s35, 3 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[31:32] ; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[31:32] @@ -2370,14 +2333,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[30:31], v[28:29], v[31:32] +; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] ; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[30:31] +; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[34:35], v[30:31], v[32:33] +; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] ; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2394,17 +2357,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[30:31] -; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[34:35] -; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[34:35] -; GFX900-NEXT: v_readlane_b32 s35, v34, 3 -; GFX900-NEXT: v_readlane_b32 s34, v34, 2 -; GFX900-NEXT: v_readlane_b32 s31, v34, 1 -; GFX900-NEXT: v_readlane_b32 s30, v34, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] +; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 989ef6f981d9d..55e1c3842aa6f 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -68,12 +68,12 @@ define amdgpu_kernel void @workgroup_ids_kernel() { define amdgpu_kernel void @caller() { ; GFX9-SDAG-LABEL: caller: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s11 -; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-SDAG-NEXT: s_mov_b32 s50, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s11 +; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-SDAG-NEXT: s_mov_b32 s12, s8 ; GFX9-SDAG-NEXT: s_add_u32 s8, s4, 36 ; GFX9-SDAG-NEXT: s_addc_u32 s9, s5, 0 @@ -86,9 +86,9 @@ define amdgpu_kernel void @caller() { ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -97,12 +97,12 @@ define amdgpu_kernel void @caller() { ; ; GFX9-GISEL-LABEL: caller: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s11 -; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-GISEL-NEXT: s_mov_b32 s50, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s11 +; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-GISEL-NEXT: s_mov_b32 s14, s8 ; GFX9-GISEL-NEXT: s_add_u32 s8, s4, 36 ; GFX9-GISEL-NEXT: s_addc_u32 s9, s5, 0 @@ -115,10 +115,10 @@ define amdgpu_kernel void @caller() { ; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s14 -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13] ; GFX9-GISEL-NEXT: s_mov_b32 s12, s14 ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 @@ -128,12 +128,12 @@ define amdgpu_kernel void @caller() { ; ; GFX9ARCH-SDAG-LABEL: caller: ; GFX9ARCH-SDAG: ; %bb.0: -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s8 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s50, -1 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9ARCH-SDAG-NEXT: s_add_u32 s48, s48, s8 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s49, s49, 0 ; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s4, 36 ; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s5, 0 ; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[4:5] @@ -145,9 +145,9 @@ define amdgpu_kernel void @caller() { ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -156,12 +156,12 @@ define amdgpu_kernel void @caller() { ; ; GFX9ARCH-GISEL-LABEL: caller: ; GFX9ARCH-GISEL: ; %bb.0: -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s8 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s50, -1 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9ARCH-GISEL-NEXT: s_add_u32 s48, s48, s8 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s49, s49, 0 ; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s4, 36 ; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s5, 0 ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[12:13], s[0:1] @@ -173,10 +173,10 @@ define amdgpu_kernel void @caller() { ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13] ; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index acb706cee04d0..d29e6f8c3d2c6 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -43,28 +43,28 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx8 s[48:55], s[8:9], 0x0 +; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: v_mov_b32_e32 v40, v0 -; CHECK-NEXT: s_add_u32 s44, s34, 40 +; CHECK-NEXT: s_add_u32 s52, s34, 40 ; CHECK-NEXT: v_mov_b32_e32 v31, v0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s33, s16 -; CHECK-NEXT: s_addc_u32 s45, s35, 0 -; CHECK-NEXT: s_mov_b32 s43, s14 +; CHECK-NEXT: s_addc_u32 s53, s35, 0 +; CHECK-NEXT: s_mov_b32 s51, s14 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b32 s12, s14 ; CHECK-NEXT: s_mov_b32 s13, s15 ; CHECK-NEXT: s_mov_b32 s14, s33 -; CHECK-NEXT: s_mov_b32 s42, s15 +; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v45, 0 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v43, v0 @@ -73,12 +73,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v41, v0 @@ -87,12 +87,12 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: ds_write_b32 v45, v45 offset:15360 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -102,22 +102,22 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0 ; CHECK-NEXT: v_and_b32_e32 v1, 28, v1 -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: global_load_dword v0, v0, s[52:53] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: global_load_dword v0, v0, s[100:101] +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4 ; CHECK-NEXT: v_mov_b32_e32 v1, 12 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v42, v0 -; CHECK-NEXT: s_mov_b32 s44, exec_lo +; CHECK-NEXT: s_mov_b32 s52, exec_lo ; CHECK-NEXT: v_cmpx_ne_u32_e32 0, v42 ; CHECK-NEXT: s_cbranch_execz .LBB0_25 ; CHECK-NEXT: ; %bb.1: ; %.preheader5 @@ -136,7 +136,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; CHECK-NEXT: v_add_nc_u32_e32 v45, -1, v42 -; CHECK-NEXT: s_mov_b32 s45, 0 +; CHECK-NEXT: s_mov_b32 s53, 0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v45 ; CHECK-NEXT: s_and_b32 exec_lo, exec_lo, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB0_25 @@ -144,46 +144,46 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v43, 10, v43 ; CHECK-NEXT: v_add_nc_u32_e32 v46, 0x3c05, v0 ; CHECK-NEXT: v_mov_b32_e32 v47, 0 -; CHECK-NEXT: s_mov_b32 s47, 0 +; CHECK-NEXT: s_mov_b32 s63, 0 ; CHECK-NEXT: .LBB0_5: ; =>This Loop Header: Depth=1 ; CHECK-NEXT: ; Child Loop BB0_8 Depth 2 ; CHECK-NEXT: ; Child Loop BB0_20 Depth 2 -; CHECK-NEXT: v_add_nc_u32_e32 v0, s47, v44 -; CHECK-NEXT: s_lshl_b32 s4, s47, 5 -; CHECK-NEXT: s_add_i32 s46, s47, 1 -; CHECK-NEXT: s_add_i32 s5, s47, 5 -; CHECK-NEXT: v_or3_b32 v57, s4, v43, s46 +; CHECK-NEXT: v_add_nc_u32_e32 v0, s63, v44 +; CHECK-NEXT: s_lshl_b32 s4, s63, 5 +; CHECK-NEXT: s_add_i32 s62, s63, 1 +; CHECK-NEXT: s_add_i32 s5, s63, 5 +; CHECK-NEXT: v_or3_b32 v57, s4, v43, s62 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: ds_read_u8 v56, v0 -; CHECK-NEXT: v_mov_b32_e32 v58, s46 -; CHECK-NEXT: s_mov_b32 s52, exec_lo +; CHECK-NEXT: v_mov_b32_e32 v58, s62 +; CHECK-NEXT: s_mov_b32 s64, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 s5, v42 ; CHECK-NEXT: s_cbranch_execz .LBB0_17 ; CHECK-NEXT: ; %bb.6: ; %.preheader2 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_mov_b32 s53, 0 -; CHECK-NEXT: s_mov_b32 s56, 0 +; CHECK-NEXT: s_mov_b32 s65, 0 +; CHECK-NEXT: s_mov_b32 s66, 0 ; CHECK-NEXT: s_branch .LBB0_8 ; CHECK-NEXT: .LBB0_7: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57 -; CHECK-NEXT: s_add_i32 s56, s56, 4 -; CHECK-NEXT: s_add_i32 s4, s47, s56 -; CHECK-NEXT: v_add_nc_u32_e32 v0, s56, v57 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 +; CHECK-NEXT: s_add_i32 s66, s66, 4 +; CHECK-NEXT: s_add_i32 s4, s63, s66 +; CHECK-NEXT: v_add_nc_u32_e32 v0, s66, v57 ; CHECK-NEXT: s_add_i32 s5, s4, 5 ; CHECK-NEXT: s_add_i32 s4, s4, 1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s5, v42 ; CHECK-NEXT: v_mov_b32_e32 v58, s4 -; CHECK-NEXT: s_or_b32 s53, vcc_lo, s53 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s53 +; CHECK-NEXT: s_or_b32 s65, vcc_lo, s65 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s65 ; CHECK-NEXT: s_cbranch_execz .LBB0_16 ; CHECK-NEXT: .LBB0_8: ; Parent Loop BB0_5 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 -; CHECK-NEXT: v_add_nc_u32_e32 v59, s56, v46 -; CHECK-NEXT: v_add_nc_u32_e32 v58, s56, v57 +; CHECK-NEXT: v_add_nc_u32_e32 v59, s66, v46 +; CHECK-NEXT: v_add_nc_u32_e32 v58, s66, v57 ; CHECK-NEXT: ds_read_u8 v0, v59 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s57, s4 +; CHECK-NEXT: s_and_saveexec_b32 s67, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.9: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -193,22 +193,22 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s57, s4 +; CHECK-NEXT: s_and_saveexec_b32 s67, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_12 ; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -218,11 +218,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v60, 1, v58 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 @@ -230,11 +230,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v60 ; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s57, s4 +; CHECK-NEXT: s_and_saveexec_b32 s67, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_14 ; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -244,11 +244,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v60, 2, v58 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 @@ -256,11 +256,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v60 ; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s57 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:3 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s57, s4 +; CHECK-NEXT: s_and_saveexec_b32 s67, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_7 ; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -270,11 +270,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v58, 3, v58 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 @@ -284,27 +284,27 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_branch .LBB0_7 ; CHECK-NEXT: .LBB0_16: ; %Flow45 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 ; CHECK-NEXT: .LBB0_17: ; %Flow46 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 -; CHECK-NEXT: s_mov_b32 s47, exec_lo +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64 +; CHECK-NEXT: s_mov_b32 s63, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 v58, v42 ; CHECK-NEXT: s_cbranch_execz .LBB0_23 ; CHECK-NEXT: ; %bb.18: ; %.preheader ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_mov_b32 s52, 0 +; CHECK-NEXT: s_mov_b32 s64, 0 ; CHECK-NEXT: s_inst_prefetch 0x1 ; CHECK-NEXT: s_branch .LBB0_20 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB0_19: ; in Loop: Header=BB0_20 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65 ; CHECK-NEXT: v_add_nc_u32_e32 v58, 1, v58 ; CHECK-NEXT: v_add_nc_u32_e32 v57, 1, v57 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v58, v42 -; CHECK-NEXT: s_or_b32 s52, vcc_lo, s52 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s52 +; CHECK-NEXT: s_or_b32 s64, vcc_lo, s64 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s64 ; CHECK-NEXT: s_cbranch_execz .LBB0_22 ; CHECK-NEXT: .LBB0_20: ; Parent Loop BB0_5 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 @@ -312,7 +312,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: ds_read_u8 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s53, s4 +; CHECK-NEXT: s_and_saveexec_b32 s65, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_19 ; CHECK-NEXT: ; %bb.21: ; in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -322,11 +322,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -336,22 +336,22 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: .LBB0_22: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64 ; CHECK-NEXT: .LBB0_23: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s47 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s46, v45 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s62, v45 ; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47 ; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46 -; CHECK-NEXT: s_mov_b32 s47, s46 +; CHECK-NEXT: s_mov_b32 s63, s62 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 ; CHECK-NEXT: s_and_b32 s4, exec_lo, s4 -; CHECK-NEXT: s_or_b32 s45, s4, s45 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s45 +; CHECK-NEXT: s_or_b32 s53, s4, s53 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: s_cbranch_execnz .LBB0_5 ; CHECK-NEXT: .LBB0_25: ; %Flow51 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s44 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 ; CHECK-NEXT: s_add_u32 s8, s34, 40 @@ -359,11 +359,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v0, 0 @@ -373,10 +373,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_cmpx_gt_u32_e64 v47, v41 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: ; %bb.26: -; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: s_mov_b32 s52, 0 ; CHECK-NEXT: s_branch .LBB0_28 ; CHECK-NEXT: .LBB0_27: ; in Loop: Header=BB0_28 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s45 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s53 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_add_u32 s8, s34, 40 @@ -384,21 +384,21 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_add_co_u32 v41, vcc_lo, v0, v41 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc_lo, v47, v41 -; CHECK-NEXT: s_or_b32 s44, vcc_lo, s44 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44 +; CHECK-NEXT: s_or_b32 s52, vcc_lo, s52 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: s_cbranch_execz .LBB0_33 ; CHECK-NEXT: .LBB0_28: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v41 -; CHECK-NEXT: s_mov_b32 s45, exec_lo +; CHECK-NEXT: s_mov_b32 s53, exec_lo ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_lshrrev_b32_e32 v63, 10, v0 @@ -407,8 +407,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62 ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72 -; CHECK-NEXT: v_add_co_u32 v2, s4, s48, v1 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s49, 0, s4 +; CHECK-NEXT: v_add_co_u32 v2, s4, s96, v1 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s97, 0, s4 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -443,10 +443,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_addPU3AS1Vjj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_addPU3AS1Vjj@rel32@hi+12 ; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4 -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: v_or3_b32 v73, v2, v0, v1 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v73 @@ -454,11 +454,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0 ; CHECK-NEXT: v_lshlrev_b32_e64 v44, v1, 1 ; CHECK-NEXT: v_and_b32_e32 v74, 28, v1 -; CHECK-NEXT: v_add_co_u32 v42, s4, s54, v0 -; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s55, 0, s4 +; CHECK-NEXT: v_add_co_u32 v42, s4, s102, v0 +; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s103, 0, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, v44 ; CHECK-NEXT: v_mov_b32_e32 v0, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mov_b32_e32 v1, v43 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_bfe_u32 v0, v0, v74, 4 @@ -469,7 +469,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_28 Depth=1 ; CHECK-NEXT: v_xor_b32_e32 v4, v60, v58 ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 16, v[56:57] -; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[50:51] +; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[98:99] ; CHECK-NEXT: v_lshlrev_b32_e32 v10, 5, v0 ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v4 ; CHECK-NEXT: v_lshlrev_b32_e32 v8, 6, v72 @@ -503,11 +503,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_branch .LBB0_27 @@ -792,28 +792,28 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx2 s[46:47], s[8:9], 0x10 +; CHECK-NEXT: s_load_dwordx2 s[62:63], s[8:9], 0x10 ; CHECK-NEXT: s_add_u32 s0, s0, s17 -; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9] +; CHECK-NEXT: s_mov_b64 s[46:47], s[8:9] ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: v_mov_b32_e32 v40, v0 -; CHECK-NEXT: s_add_u32 s44, s38, 40 +; CHECK-NEXT: s_add_u32 s52, s46, 40 ; CHECK-NEXT: v_mov_b32_e32 v31, v0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s33, s16 -; CHECK-NEXT: s_addc_u32 s45, s39, 0 -; CHECK-NEXT: s_mov_b32 s43, s14 +; CHECK-NEXT: s_addc_u32 s53, s47, 0 +; CHECK-NEXT: s_mov_b32 s51, s14 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z13get_global_idj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b32 s12, s14 ; CHECK-NEXT: s_mov_b32 s13, s15 ; CHECK-NEXT: s_mov_b32 s14, s33 -; CHECK-NEXT: s_mov_b32 s42, s15 +; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[6:7] -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v43, 0 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v42, v0 @@ -822,12 +822,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37] -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mul_lo_u32 v46, v0, 14 @@ -836,12 +836,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37] -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: ds_write_b32 v43, v43 offset:15360 ; CHECK-NEXT: v_add_nc_u32_e32 v44, 0x3c04, v46 @@ -852,15 +852,15 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z3minjj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z3minjj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0 ; CHECK-NEXT: v_and_b32_e32 v1, 28, v1 ; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37] -; CHECK-NEXT: s_mov_b64 s[8:9], s[44:45] +; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: global_load_dword v0, v0, s[46:47] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: global_load_dword v0, v0, s[62:63] +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_bfe_u32 v0, v0, v1, 4 @@ -868,7 +868,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mov_b32_e32 v41, v0 ; CHECK-NEXT: v_lshlrev_b32_e32 v42, 10, v42 -; CHECK-NEXT: s_mov_b32 s44, 0 +; CHECK-NEXT: s_mov_b32 s52, 0 ; CHECK-NEXT: s_mov_b32 s4, 0 ; CHECK-NEXT: ds_write_b8 v46, v43 offset:15364 ; CHECK-NEXT: v_add_nc_u32_e32 v45, -1, v41 @@ -878,12 +878,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: ; Child Loop BB1_8 Depth 2 ; CHECK-NEXT: v_add_nc_u32_e32 v0, s4, v44 ; CHECK-NEXT: s_lshl_b32 s5, s4, 5 -; CHECK-NEXT: s_add_i32 s45, s4, 1 +; CHECK-NEXT: s_add_i32 s53, s4, 1 ; CHECK-NEXT: s_add_i32 s6, s4, 5 -; CHECK-NEXT: v_or3_b32 v47, s5, v42, s45 +; CHECK-NEXT: v_or3_b32 v47, s5, v42, s53 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: ds_read_u8 v46, v0 -; CHECK-NEXT: v_mov_b32_e32 v56, s45 +; CHECK-NEXT: v_mov_b32_e32 v56, s53 ; CHECK-NEXT: s_mov_b32 s5, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 s6, v41 ; CHECK-NEXT: s_cbranch_execz .LBB1_5 @@ -912,23 +912,23 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: .LBB1_5: ; %Flow4 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; CHECK-NEXT: s_mov_b32 s46, exec_lo +; CHECK-NEXT: s_mov_b32 s62, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 v56, v41 ; CHECK-NEXT: s_cbranch_execz .LBB1_11 ; CHECK-NEXT: ; %bb.6: ; %.103.preheader ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: s_mov_b32 s47, 0 +; CHECK-NEXT: s_mov_b32 s63, 0 ; CHECK-NEXT: s_inst_prefetch 0x1 ; CHECK-NEXT: s_branch .LBB1_8 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB1_7: ; %.114 ; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s48 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64 ; CHECK-NEXT: v_add_nc_u32_e32 v56, 1, v56 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v56, v41 -; CHECK-NEXT: s_or_b32 s47, vcc_lo, s47 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s47 +; CHECK-NEXT: s_or_b32 s63, vcc_lo, s63 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s63 ; CHECK-NEXT: s_cbranch_execz .LBB1_10 ; CHECK-NEXT: .LBB1_8: ; %.103 ; CHECK-NEXT: ; Parent Loop BB1_1 Depth=1 @@ -937,22 +937,22 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: ds_read_u8 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v46, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s48, s4 +; CHECK-NEXT: s_and_saveexec_b32 s64, s4 ; CHECK-NEXT: s_cbranch_execz .LBB1_7 ; CHECK-NEXT: ; %bb.9: ; %.110 ; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00 -; CHECK-NEXT: s_add_u32 s8, s38, 40 -; CHECK-NEXT: s_addc_u32 s9, s39, 0 +; CHECK-NEXT: s_add_u32 s8, s46, 40 +; CHECK-NEXT: s_addc_u32 s9, s47, 0 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: v_add_nc_u32_e32 v43, 1, v43 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -962,34 +962,34 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: .LBB1_10: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s47 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63 ; CHECK-NEXT: .LBB1_11: ; %Flow2 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s46 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s62 ; CHECK-NEXT: ; %bb.12: ; %.32 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s45, v45 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s53, v45 ; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v43 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 ; CHECK-NEXT: s_and_b32 s4, exec_lo, s4 -; CHECK-NEXT: s_or_b32 s44, s4, s44 -; CHECK-NEXT: s_mov_b32 s4, s45 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s44 +; CHECK-NEXT: s_or_b32 s52, s4, s52 +; CHECK-NEXT: s_mov_b32 s4, s53 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: s_cbranch_execnz .LBB1_1 ; CHECK-NEXT: ; %bb.13: ; %.119 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s44 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 -; CHECK-NEXT: s_add_u32 s8, s38, 40 -; CHECK-NEXT: s_addc_u32 s9, s39, 0 +; CHECK-NEXT: s_add_u32 s8, s46, 40 +; CHECK-NEXT: s_addc_u32 s9, s47, 0 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s43 -; CHECK-NEXT: s_mov_b32 s13, s42 +; CHECK-NEXT: s_mov_b32 s12, s51 +; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll index b4682dfb8a26d..4ca00f2daf97a 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -12,13 +12,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 -; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART @@ -30,24 +24,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 -; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART @@ -59,23 +41,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 -; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: v_writelane_b32 v1, s59, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 @@ -89,12 +61,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v1, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 -; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: @@ -104,13 +70,7 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo -; GFX12-NEXT: v_writelane_b32 v1, s59, 0 ; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -124,50 +84,30 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v1, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 -; GFX8-NEXT: v_writelane_b32 v1, s59, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_movk_i32 s59, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v1, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -175,47 +115,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: v_writelane_b32 v1, s59, 0 -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v1, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v1, s59, 0 -; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v1, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) @@ -230,12 +152,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 ; GFX10_1-NEXT: s_addk_i32 s59, 0x4040 @@ -246,23 +162,11 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 ; GFX10_3-NEXT: s_addk_i32 s59, 0x4040 @@ -273,22 +177,11 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 -; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 -; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v1, s59, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 @@ -300,12 +193,6 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v1, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 -; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc: @@ -315,105 +202,62 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v1, s59, 0 ; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 s59, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v1, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v1, s59, 0 -; GFX8-NEXT: s_lshr_b32 s59, s32, 6 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: s_addk_i32 s59, 0x4040 +; GFX8-NEXT: s_lshr_b32 s59, s32, 6 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: s_addk_i32 s59, 0x4040 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59 ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v1, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v1, s59, 0 -; GFX900-NEXT: s_lshr_b32 s59, s32, 6 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: s_addk_i32 s59, 0x4040 +; GFX900-NEXT: s_lshr_b32 s59, s32, 6 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: s_addk_i32 s59, 0x4040 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59 ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v1, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 -; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 -; GFX942-NEXT: v_writelane_b32 v1, s59, 0 -; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v1, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 -; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) @@ -428,14 +272,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s5, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 -; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -443,19 +281,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_1-NEXT: s_mov_b32 s33, s5 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 ; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 -; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_mov_b32 s33, s5 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -463,13 +294,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s5, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 -; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -477,18 +303,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_3-NEXT: s_mov_b32 s33, s5 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 ; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 -; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_mov_b32 s33, s5 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -496,13 +316,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 -; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_add_i32 s0, s33, 64 -; GFX11-NEXT: v_writelane_b32 v1, s59, 0 +; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 @@ -511,18 +327,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: s_mov_b32 s33, s1 ; GFX11-NEXT: s_mov_b32 s59, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v1, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 -; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_mov_b32 s33, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -534,13 +343,9 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s1, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo -; GFX12-NEXT: v_writelane_b32 v1, s59, 0 +; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s33 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -554,14 +359,8 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v1, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_mov_b32 s33, s1 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -570,33 +369,22 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s6, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 -; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 -; GFX8-NEXT: v_writelane_b32 v1, s59, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: s_movk_i32 s59, 0x4040 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v1, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 -; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_mov_b32 s33, s6 -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -604,32 +392,21 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s6, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 -; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 ; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX900-NEXT: v_writelane_b32 v1, s59, 0 -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v1, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 -; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_mov_b32 s33, s6 -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -637,10 +414,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s2, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 -; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_add_i32 s0, s33, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 @@ -648,22 +421,15 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v1, s59, 0 -; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v1, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 -; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_mov_b32 s33, s2 -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) @@ -676,75 +442,39 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 -; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 -; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 -; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v0, s59, 0 ; GFX11-NEXT: s_addc_u32 s0, s32, 64 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s59, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v0, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 -; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: @@ -754,94 +484,50 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v0, s59, 0 -; GFX12-NEXT: s_mov_b32 s59, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: s_mov_b32 s59, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_readlane_b32 s59, v0, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v0, s59, 0 -; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_mov_b32 s59, 64 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1 -; GFX8-NEXT: v_readfirstlane_b32 s59, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v0, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 -; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 -; GFX900-NEXT: v_writelane_b32 v0, s59, 0 -; GFX900-NEXT: v_readfirstlane_b32 s59, v1 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v0, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v0, s59, 0 ; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v0, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) @@ -852,67 +538,32 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 ; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 ; GFX10_1-NEXT: s_add_i32 s59, s59, 64 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 ; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 ; GFX10_3-NEXT: s_add_i32 s59, s59, 64 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 -; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 -; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v0, s59, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 s59, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v0, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 -; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: @@ -922,85 +573,41 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v0, s59, 0 ; GFX12-NEXT: s_mov_b32 s59, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_readlane_b32 s59, v0, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v0, s59, 0 ; GFX8-NEXT: s_lshr_b32 s59, s32, 6 ; GFX8-NEXT: s_add_i32 s59, s59, 64 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59 ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v0, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v0, s59, 0 ; GFX900-NEXT: s_lshr_b32 s59, s32, 6 ; GFX900-NEXT: s_add_i32 s59, s59, 64 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59 ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v0, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 -; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v0, s59, 0 ; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v0, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 -; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) @@ -1013,29 +620,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s5, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 -; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 -; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_1-NEXT: s_mov_b32 s33, s5 +; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_mov_b32 s33, s5 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -1043,27 +637,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s5, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 -; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 -; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v1 +; GFX10_3-NEXT: s_mov_b32 s33, s5 +; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_mov_b32 s33, s5 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -1071,29 +654,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 -; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: s_addk_i32 s32, 0x4040 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo -; GFX11-NEXT: v_writelane_b32 v0, s59, 0 ; GFX11-NEXT: s_addc_u32 s0, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s33, s1 ; GFX11-NEXT: s_mov_b32 s59, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v0, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 -; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_mov_b32 s33, s1 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -1105,25 +676,15 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s1, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v0, s59, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 -; GFX12-NEXT: s_mov_b32 s59, s33 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 s59, s33 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v0, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_mov_b32 s33, s1 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -1132,28 +693,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s6, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 -; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v0, s59, 0 -; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: s_mov_b32 s59, 64 -; GFX8-NEXT: v_add_u32_e32 v1, vcc, s59, v1 -; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX8-NEXT: v_readfirstlane_b32 s59, v1 +; GFX8-NEXT: s_add_i32 s32, s32, 0x101000 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v0, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 -; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_mov_b32 s33, s6 -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -1161,27 +711,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s6, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 -; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 -; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 -; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX900-NEXT: v_writelane_b32 v0, s59, 0 -; GFX900-NEXT: v_readfirstlane_b32 s59, v1 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX900-NEXT: s_add_i32 s32, s32, 0x101000 +; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v0, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 -; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_mov_b32 s33, s6 -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -1189,28 +728,17 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s2, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 -; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: s_addk_i32 s32, 0x4040 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: v_writelane_b32 v0, s59, 0 ; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v0, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 -; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_mov_b32 s33, s2 -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) @@ -1223,27 +751,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s4, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 -; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 -; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 -; GFX10_1-NEXT: v_writelane_b32 v0, s59, 0 -; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800 ; GFX10_1-NEXT: s_lshr_b32 s59, s33, 5 ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: s_add_i32 s59, s59, 64 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59 ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 -; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_1-NEXT: s_mov_b32 s33, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -1251,25 +766,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s4, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 -; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 -; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 -; GFX10_3-NEXT: v_writelane_b32 v0, s59, 0 -; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800 ; GFX10_3-NEXT: s_lshr_b32 s59, s33, 5 ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: s_add_i32 s59, s59, 64 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59 ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v0, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 -; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 -; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_3-NEXT: s_mov_b32 s33, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -1277,25 +781,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 -; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 -; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s1 -; GFX11-NEXT: v_writelane_b32 v0, s59, 0 -; GFX11-NEXT: s_addk_i32 s32, 0x4080 +; GFX11-NEXT: s_addk_i32 s32, 0x4040 ; GFX11-NEXT: s_add_i32 s1, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_mov_b32 s59, s1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v0, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 -; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 -; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -1307,24 +800,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s0, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 -; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 -; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_writelane_b32 v0, s59, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 s59, s33 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_mov_b32 s32, s33 -; GFX12-NEXT: v_readlane_b32 s59, v0, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 -; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s1 ; GFX12-NEXT: s_mov_b32 s33, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -1333,25 +816,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s4, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 -; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 -; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[6:7] -; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX8-NEXT: v_writelane_b32 v0, s59, 0 +; GFX8-NEXT: s_add_i32 s32, s32, 0x101000 ; GFX8-NEXT: s_lshr_b32 s59, s33, 6 ; GFX8-NEXT: s_add_i32 s59, s59, 64 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59 ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v0, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 -; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 -; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b32 s33, s4 -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -1359,25 +831,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 -; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 -; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[6:7] -; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX900-NEXT: v_writelane_b32 v0, s59, 0 +; GFX900-NEXT: s_add_i32 s32, s32, 0x101000 ; GFX900-NEXT: s_lshr_b32 s59, s33, 6 ; GFX900-NEXT: s_add_i32 s59, s59, 64 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59 ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v0, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 -; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 -; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 -; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[6:7] ; GFX900-NEXT: s_mov_b32 s33, s4 -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -1385,25 +846,14 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s0, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 -; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 -; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 -; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[2:3] -; GFX942-NEXT: s_addk_i32 s32, 0x4080 +; GFX942-NEXT: s_addk_i32 s32, 0x4040 ; GFX942-NEXT: s_add_i32 s1, s33, 64 -; GFX942-NEXT: v_writelane_b32 v0, s59, 0 ; GFX942-NEXT: s_mov_b32 s59, s1 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v0, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 -; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 -; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 -; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_mov_b32 s33, s0 -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) @@ -1414,12 +864,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_1-NEXT: s_add_i32 s59, s4, 0x442c @@ -1431,23 +875,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 ; GFX10_3-NEXT: s_add_i32 s59, s4, 0x442c @@ -1459,22 +891,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 -; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: v_writelane_b32 v1, s59, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_add_i32 s59, s32, 0x442c ; GFX11-NEXT: v_mov_b32_e32 v0, s0 @@ -1485,12 +906,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v1, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 -; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: @@ -1500,11 +915,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: v_writelane_b32 v1, s59, 0 ; GFX12-NEXT: s_add_co_i32 s59, s32, 0x43ec ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo @@ -1514,23 +924,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v1, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_lshr_b32 s4, s32, 6 -; GFX8-NEXT: v_writelane_b32 v1, s59, 0 ; GFX8-NEXT: s_add_i32 s59, s4, 0x442c ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 @@ -1541,23 +941,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v1, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_lshr_b32 s4, s32, 6 -; GFX900-NEXT: v_writelane_b32 v1, s59, 0 ; GFX900-NEXT: s_add_i32 s59, s4, 0x442c ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 @@ -1568,22 +957,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v1, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: v_writelane_b32 v1, s59, 0 ; GFX942-NEXT: s_add_i32 s59, s32, 0x442c ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 @@ -1594,12 +972,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v1, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) @@ -1613,12 +985,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2 ; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 @@ -1632,23 +998,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: v_writelane_b32 v1, s59, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2 ; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 @@ -1662,23 +1016,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v1, 0 -; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 -; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 -; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload -; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 -; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 -; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill -; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s1, s32, 64 -; GFX11-NEXT: v_writelane_b32 v1, s59, 0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-NEXT: s_add_i32 s59, s32, s0 @@ -1690,12 +1033,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v1, 0 -; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 -; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload -; GFX11-NEXT: s_mov_b32 exec_lo, s0 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: @@ -1705,11 +1042,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 -; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_writelane_b32 v1, s59, 0 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -1723,22 +1055,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v1, 0 -; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 -; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 exec_lo, s0 -; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v1, s59, 0 ; GFX8-NEXT: s_lshl_b32 s4, s16, 2 ; GFX8-NEXT: s_lshr_b32 s59, s32, 6 ; GFX8-NEXT: s_add_i32 s59, s59, s4 @@ -1752,22 +1074,11 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v1, 0 -; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v1, s59, 0 ; GFX900-NEXT: s_lshl_b32 s4, s16, 2 ; GFX900-NEXT: s_lshr_b32 s59, s32, 6 ; GFX900-NEXT: s_add_i32 s59, s59, s4 @@ -1781,23 +1092,12 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v1, 0 -; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 -; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload -; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 -; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 -; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill -; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-NEXT: v_writelane_b32 v1, s59, 0 ; GFX942-NEXT: s_add_i32 s59, s32, s0 ; GFX942-NEXT: s_addk_i32 s59, 0x4040 ; GFX942-NEXT: s_add_i32 s0, s32, 64 @@ -1809,12 +1109,6 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s59, v1, 0 -; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 -; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 -; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload -; GFX942-NEXT: s_mov_b64 exec, s[0:1] -; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index 2420393b63ba9..23b7369a11dd3 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -44,30 +44,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s38, 7 -; GFX7-NEXT: v_writelane_b32 v23, s39, 8 -; GFX7-NEXT: v_writelane_b32 v23, s40, 9 -; GFX7-NEXT: v_writelane_b32 v23, s41, 10 -; GFX7-NEXT: v_writelane_b32 v23, s42, 11 -; GFX7-NEXT: v_writelane_b32 v23, s43, 12 -; GFX7-NEXT: v_writelane_b32 v23, s44, 13 -; GFX7-NEXT: v_writelane_b32 v23, s45, 14 -; GFX7-NEXT: v_writelane_b32 v23, s46, 15 -; GFX7-NEXT: v_writelane_b32 v23, s47, 16 -; GFX7-NEXT: v_writelane_b32 v23, s48, 17 -; GFX7-NEXT: v_writelane_b32 v23, s49, 18 -; GFX7-NEXT: v_writelane_b32 v23, s50, 19 -; GFX7-NEXT: v_writelane_b32 v23, s51, 20 -; GFX7-NEXT: v_writelane_b32 v23, s52, 21 -; GFX7-NEXT: v_writelane_b32 v23, s53, 22 -; GFX7-NEXT: v_writelane_b32 v23, s54, 23 -; GFX7-NEXT: v_writelane_b32 v23, s55, 24 -; GFX7-NEXT: v_writelane_b32 v23, s56, 25 +; GFX7-NEXT: v_writelane_b32 v23, s46, 7 +; GFX7-NEXT: v_writelane_b32 v23, s47, 8 +; GFX7-NEXT: v_writelane_b32 v23, s48, 9 +; GFX7-NEXT: v_writelane_b32 v23, s49, 10 +; GFX7-NEXT: v_writelane_b32 v23, s50, 11 +; GFX7-NEXT: v_writelane_b32 v23, s51, 12 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 -; GFX7-NEXT: v_writelane_b32 v23, s57, 26 +; GFX7-NEXT: v_writelane_b32 v23, s52, 13 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s58, 27 +; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -78,35 +65,20 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040 ; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0 -; GFX7-NEXT: v_writelane_b32 v23, s59, 28 ; GFX7-NEXT: v_readfirstlane_b32 s59, v0 ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s59, v23, 28 -; GFX7-NEXT: v_readlane_b32 s58, v23, 27 -; GFX7-NEXT: v_readlane_b32 s57, v23, 26 -; GFX7-NEXT: v_readlane_b32 s56, v23, 25 -; GFX7-NEXT: v_readlane_b32 s55, v23, 24 -; GFX7-NEXT: v_readlane_b32 s54, v23, 23 -; GFX7-NEXT: v_readlane_b32 s53, v23, 22 -; GFX7-NEXT: v_readlane_b32 s52, v23, 21 -; GFX7-NEXT: v_readlane_b32 s51, v23, 20 -; GFX7-NEXT: v_readlane_b32 s50, v23, 19 -; GFX7-NEXT: v_readlane_b32 s49, v23, 18 -; GFX7-NEXT: v_readlane_b32 s48, v23, 17 -; GFX7-NEXT: v_readlane_b32 s47, v23, 16 -; GFX7-NEXT: v_readlane_b32 s46, v23, 15 -; GFX7-NEXT: v_readlane_b32 s45, v23, 14 -; GFX7-NEXT: v_readlane_b32 s44, v23, 13 -; GFX7-NEXT: v_readlane_b32 s43, v23, 12 -; GFX7-NEXT: v_readlane_b32 s42, v23, 11 -; GFX7-NEXT: v_readlane_b32 s41, v23, 10 -; GFX7-NEXT: v_readlane_b32 s40, v23, 9 -; GFX7-NEXT: v_readlane_b32 s39, v23, 8 -; GFX7-NEXT: v_readlane_b32 s38, v23, 7 +; GFX7-NEXT: v_readlane_b32 s53, v23, 14 +; GFX7-NEXT: v_readlane_b32 s52, v23, 13 +; GFX7-NEXT: v_readlane_b32 s51, v23, 12 +; GFX7-NEXT: v_readlane_b32 s50, v23, 11 +; GFX7-NEXT: v_readlane_b32 s49, v23, 10 +; GFX7-NEXT: v_readlane_b32 s48, v23, 9 +; GFX7-NEXT: v_readlane_b32 s47, v23, 8 +; GFX7-NEXT: v_readlane_b32 s46, v23, 7 ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 @@ -135,30 +107,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: v_writelane_b32 v23, s35, 4 ; GFX8-NEXT: v_writelane_b32 v23, s36, 5 ; GFX8-NEXT: v_writelane_b32 v23, s37, 6 -; GFX8-NEXT: v_writelane_b32 v23, s38, 7 -; GFX8-NEXT: v_writelane_b32 v23, s39, 8 -; GFX8-NEXT: v_writelane_b32 v23, s40, 9 -; GFX8-NEXT: v_writelane_b32 v23, s41, 10 -; GFX8-NEXT: v_writelane_b32 v23, s42, 11 -; GFX8-NEXT: v_writelane_b32 v23, s43, 12 -; GFX8-NEXT: v_writelane_b32 v23, s44, 13 -; GFX8-NEXT: v_writelane_b32 v23, s45, 14 -; GFX8-NEXT: v_writelane_b32 v23, s46, 15 -; GFX8-NEXT: v_writelane_b32 v23, s47, 16 -; GFX8-NEXT: v_writelane_b32 v23, s48, 17 -; GFX8-NEXT: v_writelane_b32 v23, s49, 18 -; GFX8-NEXT: v_writelane_b32 v23, s50, 19 -; GFX8-NEXT: v_writelane_b32 v23, s51, 20 -; GFX8-NEXT: v_writelane_b32 v23, s52, 21 -; GFX8-NEXT: v_writelane_b32 v23, s53, 22 -; GFX8-NEXT: v_writelane_b32 v23, s54, 23 -; GFX8-NEXT: v_writelane_b32 v23, s55, 24 -; GFX8-NEXT: v_writelane_b32 v23, s56, 25 +; GFX8-NEXT: v_writelane_b32 v23, s46, 7 +; GFX8-NEXT: v_writelane_b32 v23, s47, 8 +; GFX8-NEXT: v_writelane_b32 v23, s48, 9 +; GFX8-NEXT: v_writelane_b32 v23, s49, 10 +; GFX8-NEXT: v_writelane_b32 v23, s50, 11 +; GFX8-NEXT: v_writelane_b32 v23, s51, 12 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: v_writelane_b32 v23, s57, 26 +; GFX8-NEXT: v_writelane_b32 v23, s52, 13 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v23, s58, 27 +; GFX8-NEXT: v_writelane_b32 v23, s53, 14 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -169,35 +128,20 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0 -; GFX8-NEXT: v_writelane_b32 v23, s59, 28 ; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v23, 28 -; GFX8-NEXT: v_readlane_b32 s58, v23, 27 -; GFX8-NEXT: v_readlane_b32 s57, v23, 26 -; GFX8-NEXT: v_readlane_b32 s56, v23, 25 -; GFX8-NEXT: v_readlane_b32 s55, v23, 24 -; GFX8-NEXT: v_readlane_b32 s54, v23, 23 -; GFX8-NEXT: v_readlane_b32 s53, v23, 22 -; GFX8-NEXT: v_readlane_b32 s52, v23, 21 -; GFX8-NEXT: v_readlane_b32 s51, v23, 20 -; GFX8-NEXT: v_readlane_b32 s50, v23, 19 -; GFX8-NEXT: v_readlane_b32 s49, v23, 18 -; GFX8-NEXT: v_readlane_b32 s48, v23, 17 -; GFX8-NEXT: v_readlane_b32 s47, v23, 16 -; GFX8-NEXT: v_readlane_b32 s46, v23, 15 -; GFX8-NEXT: v_readlane_b32 s45, v23, 14 -; GFX8-NEXT: v_readlane_b32 s44, v23, 13 -; GFX8-NEXT: v_readlane_b32 s43, v23, 12 -; GFX8-NEXT: v_readlane_b32 s42, v23, 11 -; GFX8-NEXT: v_readlane_b32 s41, v23, 10 -; GFX8-NEXT: v_readlane_b32 s40, v23, 9 -; GFX8-NEXT: v_readlane_b32 s39, v23, 8 -; GFX8-NEXT: v_readlane_b32 s38, v23, 7 +; GFX8-NEXT: v_readlane_b32 s53, v23, 14 +; GFX8-NEXT: v_readlane_b32 s52, v23, 13 +; GFX8-NEXT: v_readlane_b32 s51, v23, 12 +; GFX8-NEXT: v_readlane_b32 s50, v23, 11 +; GFX8-NEXT: v_readlane_b32 s49, v23, 10 +; GFX8-NEXT: v_readlane_b32 s48, v23, 9 +; GFX8-NEXT: v_readlane_b32 s47, v23, 8 +; GFX8-NEXT: v_readlane_b32 s46, v23, 7 ; GFX8-NEXT: v_readlane_b32 s37, v23, 6 ; GFX8-NEXT: v_readlane_b32 s36, v23, 5 ; GFX8-NEXT: v_readlane_b32 s35, v23, 4 @@ -226,30 +170,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: v_writelane_b32 v23, s35, 4 ; GFX900-NEXT: v_writelane_b32 v23, s36, 5 ; GFX900-NEXT: v_writelane_b32 v23, s37, 6 -; GFX900-NEXT: v_writelane_b32 v23, s38, 7 -; GFX900-NEXT: v_writelane_b32 v23, s39, 8 -; GFX900-NEXT: v_writelane_b32 v23, s40, 9 -; GFX900-NEXT: v_writelane_b32 v23, s41, 10 -; GFX900-NEXT: v_writelane_b32 v23, s42, 11 -; GFX900-NEXT: v_writelane_b32 v23, s43, 12 -; GFX900-NEXT: v_writelane_b32 v23, s44, 13 -; GFX900-NEXT: v_writelane_b32 v23, s45, 14 -; GFX900-NEXT: v_writelane_b32 v23, s46, 15 -; GFX900-NEXT: v_writelane_b32 v23, s47, 16 -; GFX900-NEXT: v_writelane_b32 v23, s48, 17 -; GFX900-NEXT: v_writelane_b32 v23, s49, 18 -; GFX900-NEXT: v_writelane_b32 v23, s50, 19 -; GFX900-NEXT: v_writelane_b32 v23, s51, 20 -; GFX900-NEXT: v_writelane_b32 v23, s52, 21 -; GFX900-NEXT: v_writelane_b32 v23, s53, 22 -; GFX900-NEXT: v_writelane_b32 v23, s54, 23 -; GFX900-NEXT: v_writelane_b32 v23, s55, 24 -; GFX900-NEXT: v_writelane_b32 v23, s56, 25 +; GFX900-NEXT: v_writelane_b32 v23, s46, 7 +; GFX900-NEXT: v_writelane_b32 v23, s47, 8 +; GFX900-NEXT: v_writelane_b32 v23, s48, 9 +; GFX900-NEXT: v_writelane_b32 v23, s49, 10 +; GFX900-NEXT: v_writelane_b32 v23, s50, 11 +; GFX900-NEXT: v_writelane_b32 v23, s51, 12 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: v_writelane_b32 v23, s57, 26 +; GFX900-NEXT: v_writelane_b32 v23, s52, 13 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v23, s58, 27 +; GFX900-NEXT: v_writelane_b32 v23, s53, 14 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -259,35 +190,20 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: v_writelane_b32 v23, s59, 28 ; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v23, 28 -; GFX900-NEXT: v_readlane_b32 s58, v23, 27 -; GFX900-NEXT: v_readlane_b32 s57, v23, 26 -; GFX900-NEXT: v_readlane_b32 s56, v23, 25 -; GFX900-NEXT: v_readlane_b32 s55, v23, 24 -; GFX900-NEXT: v_readlane_b32 s54, v23, 23 -; GFX900-NEXT: v_readlane_b32 s53, v23, 22 -; GFX900-NEXT: v_readlane_b32 s52, v23, 21 -; GFX900-NEXT: v_readlane_b32 s51, v23, 20 -; GFX900-NEXT: v_readlane_b32 s50, v23, 19 -; GFX900-NEXT: v_readlane_b32 s49, v23, 18 -; GFX900-NEXT: v_readlane_b32 s48, v23, 17 -; GFX900-NEXT: v_readlane_b32 s47, v23, 16 -; GFX900-NEXT: v_readlane_b32 s46, v23, 15 -; GFX900-NEXT: v_readlane_b32 s45, v23, 14 -; GFX900-NEXT: v_readlane_b32 s44, v23, 13 -; GFX900-NEXT: v_readlane_b32 s43, v23, 12 -; GFX900-NEXT: v_readlane_b32 s42, v23, 11 -; GFX900-NEXT: v_readlane_b32 s41, v23, 10 -; GFX900-NEXT: v_readlane_b32 s40, v23, 9 -; GFX900-NEXT: v_readlane_b32 s39, v23, 8 -; GFX900-NEXT: v_readlane_b32 s38, v23, 7 +; GFX900-NEXT: v_readlane_b32 s53, v23, 14 +; GFX900-NEXT: v_readlane_b32 s52, v23, 13 +; GFX900-NEXT: v_readlane_b32 s51, v23, 12 +; GFX900-NEXT: v_readlane_b32 s50, v23, 11 +; GFX900-NEXT: v_readlane_b32 s49, v23, 10 +; GFX900-NEXT: v_readlane_b32 s48, v23, 9 +; GFX900-NEXT: v_readlane_b32 s47, v23, 8 +; GFX900-NEXT: v_readlane_b32 s46, v23, 7 ; GFX900-NEXT: v_readlane_b32 s37, v23, 6 ; GFX900-NEXT: v_readlane_b32 s36, v23, 5 ; GFX900-NEXT: v_readlane_b32 s35, v23, 4 @@ -316,33 +232,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: v_writelane_b32 v23, s35, 4 ; GFX942-NEXT: v_writelane_b32 v23, s36, 5 ; GFX942-NEXT: v_writelane_b32 v23, s37, 6 -; GFX942-NEXT: v_writelane_b32 v23, s38, 7 -; GFX942-NEXT: v_writelane_b32 v23, s39, 8 -; GFX942-NEXT: v_writelane_b32 v23, s40, 9 -; GFX942-NEXT: v_writelane_b32 v23, s41, 10 -; GFX942-NEXT: v_writelane_b32 v23, s42, 11 -; GFX942-NEXT: v_writelane_b32 v23, s43, 12 -; GFX942-NEXT: v_writelane_b32 v23, s44, 13 -; GFX942-NEXT: v_writelane_b32 v23, s45, 14 -; GFX942-NEXT: v_writelane_b32 v23, s46, 15 -; GFX942-NEXT: v_writelane_b32 v23, s47, 16 -; GFX942-NEXT: v_writelane_b32 v23, s48, 17 -; GFX942-NEXT: v_writelane_b32 v23, s49, 18 -; GFX942-NEXT: v_writelane_b32 v23, s50, 19 -; GFX942-NEXT: v_writelane_b32 v23, s51, 20 -; GFX942-NEXT: v_writelane_b32 v23, s52, 21 -; GFX942-NEXT: v_writelane_b32 v23, s53, 22 -; GFX942-NEXT: v_writelane_b32 v23, s54, 23 -; GFX942-NEXT: v_writelane_b32 v23, s55, 24 -; GFX942-NEXT: v_writelane_b32 v23, s56, 25 -; GFX942-NEXT: v_writelane_b32 v23, s57, 26 -; GFX942-NEXT: v_writelane_b32 v23, s58, 27 -; GFX942-NEXT: v_writelane_b32 v23, s59, 28 -; GFX942-NEXT: v_writelane_b32 v23, s60, 29 +; GFX942-NEXT: v_writelane_b32 v23, s46, 7 +; GFX942-NEXT: v_writelane_b32 v23, s47, 8 +; GFX942-NEXT: v_writelane_b32 v23, s48, 9 +; GFX942-NEXT: v_writelane_b32 v23, s49, 10 +; GFX942-NEXT: v_writelane_b32 v23, s50, 11 +; GFX942-NEXT: v_writelane_b32 v23, s51, 12 ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v23, s61, 30 +; GFX942-NEXT: v_writelane_b32 v23, s52, 13 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: v_writelane_b32 v23, s53, 14 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -356,30 +256,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s61, v23, 30 -; GFX942-NEXT: v_readlane_b32 s60, v23, 29 -; GFX942-NEXT: v_readlane_b32 s59, v23, 28 -; GFX942-NEXT: v_readlane_b32 s58, v23, 27 -; GFX942-NEXT: v_readlane_b32 s57, v23, 26 -; GFX942-NEXT: v_readlane_b32 s56, v23, 25 -; GFX942-NEXT: v_readlane_b32 s55, v23, 24 -; GFX942-NEXT: v_readlane_b32 s54, v23, 23 -; GFX942-NEXT: v_readlane_b32 s53, v23, 22 -; GFX942-NEXT: v_readlane_b32 s52, v23, 21 -; GFX942-NEXT: v_readlane_b32 s51, v23, 20 -; GFX942-NEXT: v_readlane_b32 s50, v23, 19 -; GFX942-NEXT: v_readlane_b32 s49, v23, 18 -; GFX942-NEXT: v_readlane_b32 s48, v23, 17 -; GFX942-NEXT: v_readlane_b32 s47, v23, 16 -; GFX942-NEXT: v_readlane_b32 s46, v23, 15 -; GFX942-NEXT: v_readlane_b32 s45, v23, 14 -; GFX942-NEXT: v_readlane_b32 s44, v23, 13 -; GFX942-NEXT: v_readlane_b32 s43, v23, 12 -; GFX942-NEXT: v_readlane_b32 s42, v23, 11 -; GFX942-NEXT: v_readlane_b32 s41, v23, 10 -; GFX942-NEXT: v_readlane_b32 s40, v23, 9 -; GFX942-NEXT: v_readlane_b32 s39, v23, 8 -; GFX942-NEXT: v_readlane_b32 s38, v23, 7 +; GFX942-NEXT: v_readlane_b32 s53, v23, 14 +; GFX942-NEXT: v_readlane_b32 s52, v23, 13 +; GFX942-NEXT: v_readlane_b32 s51, v23, 12 +; GFX942-NEXT: v_readlane_b32 s50, v23, 11 +; GFX942-NEXT: v_readlane_b32 s49, v23, 10 +; GFX942-NEXT: v_readlane_b32 s48, v23, 9 +; GFX942-NEXT: v_readlane_b32 s47, v23, 8 +; GFX942-NEXT: v_readlane_b32 s46, v23, 7 ; GFX942-NEXT: v_readlane_b32 s37, v23, 6 ; GFX942-NEXT: v_readlane_b32 s36, v23, 5 ; GFX942-NEXT: v_readlane_b32 s35, v23, 4 @@ -415,59 +299,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v23, s40, 9 -; GFX10_1-NEXT: v_writelane_b32 v23, s41, 10 -; GFX10_1-NEXT: v_writelane_b32 v23, s42, 11 -; GFX10_1-NEXT: v_writelane_b32 v23, s43, 12 -; GFX10_1-NEXT: v_writelane_b32 v23, s44, 13 -; GFX10_1-NEXT: v_writelane_b32 v23, s45, 14 -; GFX10_1-NEXT: v_writelane_b32 v23, s46, 15 -; GFX10_1-NEXT: v_writelane_b32 v23, s47, 16 -; GFX10_1-NEXT: v_writelane_b32 v23, s48, 17 -; GFX10_1-NEXT: v_writelane_b32 v23, s49, 18 -; GFX10_1-NEXT: v_writelane_b32 v23, s50, 19 -; GFX10_1-NEXT: v_writelane_b32 v23, s51, 20 -; GFX10_1-NEXT: v_writelane_b32 v23, s52, 21 -; GFX10_1-NEXT: v_writelane_b32 v23, s53, 22 -; GFX10_1-NEXT: v_writelane_b32 v23, s54, 23 -; GFX10_1-NEXT: v_writelane_b32 v23, s55, 24 -; GFX10_1-NEXT: v_writelane_b32 v23, s56, 25 -; GFX10_1-NEXT: v_writelane_b32 v23, s57, 26 -; GFX10_1-NEXT: v_writelane_b32 v23, s58, 27 +; GFX10_1-NEXT: v_writelane_b32 v23, s46, 7 +; GFX10_1-NEXT: v_writelane_b32 v23, s47, 8 +; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9 +; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10 +; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11 +; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12 +; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13 +; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32 -; GFX10_1-NEXT: v_writelane_b32 v23, s59, 28 ; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 ; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v23, 28 -; GFX10_1-NEXT: v_readlane_b32 s58, v23, 27 -; GFX10_1-NEXT: v_readlane_b32 s57, v23, 26 -; GFX10_1-NEXT: v_readlane_b32 s56, v23, 25 -; GFX10_1-NEXT: v_readlane_b32 s55, v23, 24 -; GFX10_1-NEXT: v_readlane_b32 s54, v23, 23 -; GFX10_1-NEXT: v_readlane_b32 s53, v23, 22 -; GFX10_1-NEXT: v_readlane_b32 s52, v23, 21 -; GFX10_1-NEXT: v_readlane_b32 s51, v23, 20 -; GFX10_1-NEXT: v_readlane_b32 s50, v23, 19 -; GFX10_1-NEXT: v_readlane_b32 s49, v23, 18 -; GFX10_1-NEXT: v_readlane_b32 s48, v23, 17 -; GFX10_1-NEXT: v_readlane_b32 s47, v23, 16 -; GFX10_1-NEXT: v_readlane_b32 s46, v23, 15 -; GFX10_1-NEXT: v_readlane_b32 s45, v23, 14 -; GFX10_1-NEXT: v_readlane_b32 s44, v23, 13 -; GFX10_1-NEXT: v_readlane_b32 s43, v23, 12 -; GFX10_1-NEXT: v_readlane_b32 s42, v23, 11 -; GFX10_1-NEXT: v_readlane_b32 s41, v23, 10 -; GFX10_1-NEXT: v_readlane_b32 s40, v23, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 +; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14 +; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13 +; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12 +; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11 +; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10 +; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9 +; GFX10_1-NEXT: v_readlane_b32 s47, v23, 8 +; GFX10_1-NEXT: v_readlane_b32 s46, v23, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 @@ -503,59 +359,31 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v23, s40, 9 -; GFX10_3-NEXT: v_writelane_b32 v23, s41, 10 -; GFX10_3-NEXT: v_writelane_b32 v23, s42, 11 -; GFX10_3-NEXT: v_writelane_b32 v23, s43, 12 -; GFX10_3-NEXT: v_writelane_b32 v23, s44, 13 -; GFX10_3-NEXT: v_writelane_b32 v23, s45, 14 -; GFX10_3-NEXT: v_writelane_b32 v23, s46, 15 -; GFX10_3-NEXT: v_writelane_b32 v23, s47, 16 -; GFX10_3-NEXT: v_writelane_b32 v23, s48, 17 -; GFX10_3-NEXT: v_writelane_b32 v23, s49, 18 -; GFX10_3-NEXT: v_writelane_b32 v23, s50, 19 -; GFX10_3-NEXT: v_writelane_b32 v23, s51, 20 -; GFX10_3-NEXT: v_writelane_b32 v23, s52, 21 -; GFX10_3-NEXT: v_writelane_b32 v23, s53, 22 -; GFX10_3-NEXT: v_writelane_b32 v23, s54, 23 -; GFX10_3-NEXT: v_writelane_b32 v23, s55, 24 -; GFX10_3-NEXT: v_writelane_b32 v23, s56, 25 -; GFX10_3-NEXT: v_writelane_b32 v23, s57, 26 -; GFX10_3-NEXT: v_writelane_b32 v23, s58, 27 +; GFX10_3-NEXT: v_writelane_b32 v23, s46, 7 +; GFX10_3-NEXT: v_writelane_b32 v23, s47, 8 +; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9 +; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10 +; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11 +; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12 +; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13 +; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32 -; GFX10_3-NEXT: v_writelane_b32 v23, s59, 28 ; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 ; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v23, 28 -; GFX10_3-NEXT: v_readlane_b32 s58, v23, 27 -; GFX10_3-NEXT: v_readlane_b32 s57, v23, 26 -; GFX10_3-NEXT: v_readlane_b32 s56, v23, 25 -; GFX10_3-NEXT: v_readlane_b32 s55, v23, 24 -; GFX10_3-NEXT: v_readlane_b32 s54, v23, 23 -; GFX10_3-NEXT: v_readlane_b32 s53, v23, 22 -; GFX10_3-NEXT: v_readlane_b32 s52, v23, 21 -; GFX10_3-NEXT: v_readlane_b32 s51, v23, 20 -; GFX10_3-NEXT: v_readlane_b32 s50, v23, 19 -; GFX10_3-NEXT: v_readlane_b32 s49, v23, 18 -; GFX10_3-NEXT: v_readlane_b32 s48, v23, 17 -; GFX10_3-NEXT: v_readlane_b32 s47, v23, 16 -; GFX10_3-NEXT: v_readlane_b32 s46, v23, 15 -; GFX10_3-NEXT: v_readlane_b32 s45, v23, 14 -; GFX10_3-NEXT: v_readlane_b32 s44, v23, 13 -; GFX10_3-NEXT: v_readlane_b32 s43, v23, 12 -; GFX10_3-NEXT: v_readlane_b32 s42, v23, 11 -; GFX10_3-NEXT: v_readlane_b32 s41, v23, 10 -; GFX10_3-NEXT: v_readlane_b32 s40, v23, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 +; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14 +; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13 +; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12 +; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11 +; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10 +; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9 +; GFX10_3-NEXT: v_readlane_b32 s47, v23, 8 +; GFX10_3-NEXT: v_readlane_b32 s46, v23, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 @@ -591,65 +419,33 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX11-NEXT: v_writelane_b32 v23, s35, 4 ; GFX11-NEXT: v_writelane_b32 v23, s36, 5 ; GFX11-NEXT: v_writelane_b32 v23, s37, 6 -; GFX11-NEXT: v_writelane_b32 v23, s38, 7 -; GFX11-NEXT: v_writelane_b32 v23, s39, 8 -; GFX11-NEXT: v_writelane_b32 v23, s40, 9 -; GFX11-NEXT: v_writelane_b32 v23, s41, 10 -; GFX11-NEXT: v_writelane_b32 v23, s42, 11 -; GFX11-NEXT: v_writelane_b32 v23, s43, 12 -; GFX11-NEXT: v_writelane_b32 v23, s44, 13 -; GFX11-NEXT: v_writelane_b32 v23, s45, 14 -; GFX11-NEXT: v_writelane_b32 v23, s46, 15 -; GFX11-NEXT: v_writelane_b32 v23, s47, 16 -; GFX11-NEXT: v_writelane_b32 v23, s48, 17 -; GFX11-NEXT: v_writelane_b32 v23, s49, 18 -; GFX11-NEXT: v_writelane_b32 v23, s50, 19 -; GFX11-NEXT: v_writelane_b32 v23, s51, 20 -; GFX11-NEXT: v_writelane_b32 v23, s52, 21 -; GFX11-NEXT: v_writelane_b32 v23, s53, 22 -; GFX11-NEXT: v_writelane_b32 v23, s54, 23 -; GFX11-NEXT: v_writelane_b32 v23, s55, 24 -; GFX11-NEXT: v_writelane_b32 v23, s56, 25 -; GFX11-NEXT: v_writelane_b32 v23, s57, 26 -; GFX11-NEXT: v_writelane_b32 v23, s58, 27 +; GFX11-NEXT: v_writelane_b32 v23, s46, 7 +; GFX11-NEXT: v_writelane_b32 v23, s47, 8 +; GFX11-NEXT: v_writelane_b32 v23, s48, 9 +; GFX11-NEXT: v_writelane_b32 v23, s49, 10 +; GFX11-NEXT: v_writelane_b32 v23, s50, 11 +; GFX11-NEXT: v_writelane_b32 v23, s51, 12 +; GFX11-NEXT: v_writelane_b32 v23, s52, 13 +; GFX11-NEXT: v_writelane_b32 v23, s53, 14 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_addc_u32 s32, s32, 0x4040 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_bitcmp1_b32 s32, 0 -; GFX11-NEXT: v_writelane_b32 v23, s59, 28 -; GFX11-NEXT: s_bitset0_b32 s32, 0 -; GFX11-NEXT: s_mov_b32 s59, s32 -; GFX11-NEXT: s_addc_u32 s32, s32, 0xffffbfc0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_bitcmp1_b32 s32, 0 -; GFX11-NEXT: s_bitset0_b32 s32, 0 +; GFX11-NEXT: s_addc_u32 s60, s32, 0x4040 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_bitcmp1_b32 s60, 0 +; GFX11-NEXT: s_bitset0_b32 s60, 0 +; GFX11-NEXT: s_mov_b32 s59, s60 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v23, 28 -; GFX11-NEXT: v_readlane_b32 s58, v23, 27 -; GFX11-NEXT: v_readlane_b32 s57, v23, 26 -; GFX11-NEXT: v_readlane_b32 s56, v23, 25 -; GFX11-NEXT: v_readlane_b32 s55, v23, 24 -; GFX11-NEXT: v_readlane_b32 s54, v23, 23 -; GFX11-NEXT: v_readlane_b32 s53, v23, 22 -; GFX11-NEXT: v_readlane_b32 s52, v23, 21 -; GFX11-NEXT: v_readlane_b32 s51, v23, 20 -; GFX11-NEXT: v_readlane_b32 s50, v23, 19 -; GFX11-NEXT: v_readlane_b32 s49, v23, 18 -; GFX11-NEXT: v_readlane_b32 s48, v23, 17 -; GFX11-NEXT: v_readlane_b32 s47, v23, 16 -; GFX11-NEXT: v_readlane_b32 s46, v23, 15 -; GFX11-NEXT: v_readlane_b32 s45, v23, 14 -; GFX11-NEXT: v_readlane_b32 s44, v23, 13 -; GFX11-NEXT: v_readlane_b32 s43, v23, 12 -; GFX11-NEXT: v_readlane_b32 s42, v23, 11 -; GFX11-NEXT: v_readlane_b32 s41, v23, 10 -; GFX11-NEXT: v_readlane_b32 s40, v23, 9 -; GFX11-NEXT: v_readlane_b32 s39, v23, 8 -; GFX11-NEXT: v_readlane_b32 s38, v23, 7 +; GFX11-NEXT: v_readlane_b32 s53, v23, 14 +; GFX11-NEXT: v_readlane_b32 s52, v23, 13 +; GFX11-NEXT: v_readlane_b32 s51, v23, 12 +; GFX11-NEXT: v_readlane_b32 s50, v23, 11 +; GFX11-NEXT: v_readlane_b32 s49, v23, 10 +; GFX11-NEXT: v_readlane_b32 s48, v23, 9 +; GFX11-NEXT: v_readlane_b32 s47, v23, 8 +; GFX11-NEXT: v_readlane_b32 s46, v23, 7 ; GFX11-NEXT: v_readlane_b32 s37, v23, 6 ; GFX11-NEXT: v_readlane_b32 s36, v23, 5 ; GFX11-NEXT: v_readlane_b32 s35, v23, 4 @@ -687,66 +483,34 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: v_writelane_b32 v23, s35, 4 ; GFX12-NEXT: v_writelane_b32 v23, s36, 5 ; GFX12-NEXT: v_writelane_b32 v23, s37, 6 -; GFX12-NEXT: v_writelane_b32 v23, s38, 7 -; GFX12-NEXT: v_writelane_b32 v23, s39, 8 -; GFX12-NEXT: v_writelane_b32 v23, s40, 9 -; GFX12-NEXT: v_writelane_b32 v23, s41, 10 -; GFX12-NEXT: v_writelane_b32 v23, s42, 11 -; GFX12-NEXT: v_writelane_b32 v23, s43, 12 -; GFX12-NEXT: v_writelane_b32 v23, s44, 13 -; GFX12-NEXT: v_writelane_b32 v23, s45, 14 -; GFX12-NEXT: v_writelane_b32 v23, s46, 15 -; GFX12-NEXT: v_writelane_b32 v23, s47, 16 -; GFX12-NEXT: v_writelane_b32 v23, s48, 17 -; GFX12-NEXT: v_writelane_b32 v23, s49, 18 -; GFX12-NEXT: v_writelane_b32 v23, s50, 19 -; GFX12-NEXT: v_writelane_b32 v23, s51, 20 -; GFX12-NEXT: v_writelane_b32 v23, s52, 21 -; GFX12-NEXT: v_writelane_b32 v23, s53, 22 -; GFX12-NEXT: v_writelane_b32 v23, s54, 23 -; GFX12-NEXT: v_writelane_b32 v23, s55, 24 -; GFX12-NEXT: v_writelane_b32 v23, s56, 25 -; GFX12-NEXT: v_writelane_b32 v23, s57, 26 -; GFX12-NEXT: v_writelane_b32 v23, s58, 27 +; GFX12-NEXT: v_writelane_b32 v23, s46, 7 +; GFX12-NEXT: v_writelane_b32 v23, s47, 8 +; GFX12-NEXT: v_writelane_b32 v23, s48, 9 +; GFX12-NEXT: v_writelane_b32 v23, s49, 10 +; GFX12-NEXT: v_writelane_b32 v23, s50, 11 +; GFX12-NEXT: v_writelane_b32 v23, s51, 12 +; GFX12-NEXT: v_writelane_b32 v23, s52, 13 +; GFX12-NEXT: v_writelane_b32 v23, s53, 14 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0x4000 +; GFX12-NEXT: s_add_co_ci_u32 s60, s32, 0x4000 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_bitcmp1_b32 s32, 0 -; GFX12-NEXT: v_writelane_b32 v23, s59, 28 -; GFX12-NEXT: s_bitset0_b32 s32, 0 +; GFX12-NEXT: s_bitcmp1_b32 s60, 0 +; GFX12-NEXT: s_bitset0_b32 s60, 0 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s32 -; GFX12-NEXT: s_add_co_ci_u32 s32, s32, 0xffffc000 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_bitcmp1_b32 s32, 0 -; GFX12-NEXT: s_bitset0_b32 s32, 0 +; GFX12-NEXT: s_mov_b32 s59, s60 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v23, 28 -; GFX12-NEXT: v_readlane_b32 s58, v23, 27 -; GFX12-NEXT: v_readlane_b32 s57, v23, 26 -; GFX12-NEXT: v_readlane_b32 s56, v23, 25 -; GFX12-NEXT: v_readlane_b32 s55, v23, 24 -; GFX12-NEXT: v_readlane_b32 s54, v23, 23 -; GFX12-NEXT: v_readlane_b32 s53, v23, 22 -; GFX12-NEXT: v_readlane_b32 s52, v23, 21 -; GFX12-NEXT: v_readlane_b32 s51, v23, 20 -; GFX12-NEXT: v_readlane_b32 s50, v23, 19 -; GFX12-NEXT: v_readlane_b32 s49, v23, 18 -; GFX12-NEXT: v_readlane_b32 s48, v23, 17 -; GFX12-NEXT: v_readlane_b32 s47, v23, 16 -; GFX12-NEXT: v_readlane_b32 s46, v23, 15 -; GFX12-NEXT: v_readlane_b32 s45, v23, 14 -; GFX12-NEXT: v_readlane_b32 s44, v23, 13 -; GFX12-NEXT: v_readlane_b32 s43, v23, 12 -; GFX12-NEXT: v_readlane_b32 s42, v23, 11 -; GFX12-NEXT: v_readlane_b32 s41, v23, 10 -; GFX12-NEXT: v_readlane_b32 s40, v23, 9 -; GFX12-NEXT: v_readlane_b32 s39, v23, 8 -; GFX12-NEXT: v_readlane_b32 s38, v23, 7 +; GFX12-NEXT: v_readlane_b32 s53, v23, 14 +; GFX12-NEXT: v_readlane_b32 s52, v23, 13 +; GFX12-NEXT: v_readlane_b32 s51, v23, 12 +; GFX12-NEXT: v_readlane_b32 s50, v23, 11 +; GFX12-NEXT: v_readlane_b32 s49, v23, 10 +; GFX12-NEXT: v_readlane_b32 s48, v23, 9 +; GFX12-NEXT: v_readlane_b32 s47, v23, 8 +; GFX12-NEXT: v_readlane_b32 s46, v23, 7 ; GFX12-NEXT: v_readlane_b32 s37, v23, 6 ; GFX12-NEXT: v_readlane_b32 s36, v23, 5 ; GFX12-NEXT: v_readlane_b32 s35, v23, 4 @@ -817,60 +581,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: v_writelane_b32 v21, s35, 4 ; GFX7-NEXT: v_writelane_b32 v21, s36, 5 ; GFX7-NEXT: v_writelane_b32 v21, s37, 6 -; GFX7-NEXT: v_writelane_b32 v21, s38, 7 -; GFX7-NEXT: v_writelane_b32 v21, s39, 8 -; GFX7-NEXT: v_writelane_b32 v21, s40, 9 -; GFX7-NEXT: v_writelane_b32 v21, s41, 10 -; GFX7-NEXT: v_writelane_b32 v21, s42, 11 -; GFX7-NEXT: v_writelane_b32 v21, s43, 12 -; GFX7-NEXT: v_writelane_b32 v21, s44, 13 -; GFX7-NEXT: v_writelane_b32 v21, s45, 14 -; GFX7-NEXT: v_writelane_b32 v21, s46, 15 -; GFX7-NEXT: v_writelane_b32 v21, s47, 16 -; GFX7-NEXT: v_writelane_b32 v21, s48, 17 -; GFX7-NEXT: v_writelane_b32 v21, s49, 18 -; GFX7-NEXT: v_writelane_b32 v21, s50, 19 -; GFX7-NEXT: v_writelane_b32 v21, s51, 20 -; GFX7-NEXT: v_writelane_b32 v21, s52, 21 -; GFX7-NEXT: v_writelane_b32 v21, s53, 22 -; GFX7-NEXT: v_writelane_b32 v21, s54, 23 -; GFX7-NEXT: v_writelane_b32 v21, s55, 24 -; GFX7-NEXT: v_writelane_b32 v21, s56, 25 -; GFX7-NEXT: v_writelane_b32 v21, s57, 26 +; GFX7-NEXT: v_writelane_b32 v21, s46, 7 +; GFX7-NEXT: v_writelane_b32 v21, s47, 8 +; GFX7-NEXT: v_writelane_b32 v21, s48, 9 +; GFX7-NEXT: v_writelane_b32 v21, s49, 10 +; GFX7-NEXT: v_writelane_b32 v21, s50, 11 +; GFX7-NEXT: v_writelane_b32 v21, s51, 12 +; GFX7-NEXT: v_writelane_b32 v21, s52, 13 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v21, s58, 27 +; GFX7-NEXT: v_writelane_b32 v21, s53, 14 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32 ; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22 -; GFX7-NEXT: v_writelane_b32 v21, s59, 28 ; GFX7-NEXT: v_readfirstlane_b32 s59, v22 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s59, v21, 28 -; GFX7-NEXT: v_readlane_b32 s58, v21, 27 -; GFX7-NEXT: v_readlane_b32 s57, v21, 26 -; GFX7-NEXT: v_readlane_b32 s56, v21, 25 -; GFX7-NEXT: v_readlane_b32 s55, v21, 24 -; GFX7-NEXT: v_readlane_b32 s54, v21, 23 -; GFX7-NEXT: v_readlane_b32 s53, v21, 22 -; GFX7-NEXT: v_readlane_b32 s52, v21, 21 -; GFX7-NEXT: v_readlane_b32 s51, v21, 20 -; GFX7-NEXT: v_readlane_b32 s50, v21, 19 -; GFX7-NEXT: v_readlane_b32 s49, v21, 18 -; GFX7-NEXT: v_readlane_b32 s48, v21, 17 -; GFX7-NEXT: v_readlane_b32 s47, v21, 16 -; GFX7-NEXT: v_readlane_b32 s46, v21, 15 -; GFX7-NEXT: v_readlane_b32 s45, v21, 14 -; GFX7-NEXT: v_readlane_b32 s44, v21, 13 -; GFX7-NEXT: v_readlane_b32 s43, v21, 12 -; GFX7-NEXT: v_readlane_b32 s42, v21, 11 -; GFX7-NEXT: v_readlane_b32 s41, v21, 10 -; GFX7-NEXT: v_readlane_b32 s40, v21, 9 -; GFX7-NEXT: v_readlane_b32 s39, v21, 8 -; GFX7-NEXT: v_readlane_b32 s38, v21, 7 +; GFX7-NEXT: v_readlane_b32 s53, v21, 14 +; GFX7-NEXT: v_readlane_b32 s52, v21, 13 +; GFX7-NEXT: v_readlane_b32 s51, v21, 12 +; GFX7-NEXT: v_readlane_b32 s50, v21, 11 +; GFX7-NEXT: v_readlane_b32 s49, v21, 10 +; GFX7-NEXT: v_readlane_b32 s48, v21, 9 +; GFX7-NEXT: v_readlane_b32 s47, v21, 8 +; GFX7-NEXT: v_readlane_b32 s46, v21, 7 ; GFX7-NEXT: v_readlane_b32 s37, v21, 6 ; GFX7-NEXT: v_readlane_b32 s36, v21, 5 ; GFX7-NEXT: v_readlane_b32 s35, v21, 4 @@ -899,60 +635,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: v_writelane_b32 v21, s35, 4 ; GFX8-NEXT: v_writelane_b32 v21, s36, 5 ; GFX8-NEXT: v_writelane_b32 v21, s37, 6 -; GFX8-NEXT: v_writelane_b32 v21, s38, 7 -; GFX8-NEXT: v_writelane_b32 v21, s39, 8 -; GFX8-NEXT: v_writelane_b32 v21, s40, 9 -; GFX8-NEXT: v_writelane_b32 v21, s41, 10 -; GFX8-NEXT: v_writelane_b32 v21, s42, 11 -; GFX8-NEXT: v_writelane_b32 v21, s43, 12 -; GFX8-NEXT: v_writelane_b32 v21, s44, 13 -; GFX8-NEXT: v_writelane_b32 v21, s45, 14 -; GFX8-NEXT: v_writelane_b32 v21, s46, 15 -; GFX8-NEXT: v_writelane_b32 v21, s47, 16 -; GFX8-NEXT: v_writelane_b32 v21, s48, 17 -; GFX8-NEXT: v_writelane_b32 v21, s49, 18 -; GFX8-NEXT: v_writelane_b32 v21, s50, 19 -; GFX8-NEXT: v_writelane_b32 v21, s51, 20 -; GFX8-NEXT: v_writelane_b32 v21, s52, 21 -; GFX8-NEXT: v_writelane_b32 v21, s53, 22 -; GFX8-NEXT: v_writelane_b32 v21, s54, 23 -; GFX8-NEXT: v_writelane_b32 v21, s55, 24 -; GFX8-NEXT: v_writelane_b32 v21, s56, 25 -; GFX8-NEXT: v_writelane_b32 v21, s57, 26 +; GFX8-NEXT: v_writelane_b32 v21, s46, 7 +; GFX8-NEXT: v_writelane_b32 v21, s47, 8 +; GFX8-NEXT: v_writelane_b32 v21, s48, 9 +; GFX8-NEXT: v_writelane_b32 v21, s49, 10 +; GFX8-NEXT: v_writelane_b32 v21, s50, 11 +; GFX8-NEXT: v_writelane_b32 v21, s51, 12 +; GFX8-NEXT: v_writelane_b32 v21, s52, 13 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v21, s58, 27 +; GFX8-NEXT: v_writelane_b32 v21, s53, 14 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22 -; GFX8-NEXT: v_writelane_b32 v21, s59, 28 ; GFX8-NEXT: v_readfirstlane_b32 s59, v22 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v21, 28 -; GFX8-NEXT: v_readlane_b32 s58, v21, 27 -; GFX8-NEXT: v_readlane_b32 s57, v21, 26 -; GFX8-NEXT: v_readlane_b32 s56, v21, 25 -; GFX8-NEXT: v_readlane_b32 s55, v21, 24 -; GFX8-NEXT: v_readlane_b32 s54, v21, 23 -; GFX8-NEXT: v_readlane_b32 s53, v21, 22 -; GFX8-NEXT: v_readlane_b32 s52, v21, 21 -; GFX8-NEXT: v_readlane_b32 s51, v21, 20 -; GFX8-NEXT: v_readlane_b32 s50, v21, 19 -; GFX8-NEXT: v_readlane_b32 s49, v21, 18 -; GFX8-NEXT: v_readlane_b32 s48, v21, 17 -; GFX8-NEXT: v_readlane_b32 s47, v21, 16 -; GFX8-NEXT: v_readlane_b32 s46, v21, 15 -; GFX8-NEXT: v_readlane_b32 s45, v21, 14 -; GFX8-NEXT: v_readlane_b32 s44, v21, 13 -; GFX8-NEXT: v_readlane_b32 s43, v21, 12 -; GFX8-NEXT: v_readlane_b32 s42, v21, 11 -; GFX8-NEXT: v_readlane_b32 s41, v21, 10 -; GFX8-NEXT: v_readlane_b32 s40, v21, 9 -; GFX8-NEXT: v_readlane_b32 s39, v21, 8 -; GFX8-NEXT: v_readlane_b32 s38, v21, 7 +; GFX8-NEXT: v_readlane_b32 s53, v21, 14 +; GFX8-NEXT: v_readlane_b32 s52, v21, 13 +; GFX8-NEXT: v_readlane_b32 s51, v21, 12 +; GFX8-NEXT: v_readlane_b32 s50, v21, 11 +; GFX8-NEXT: v_readlane_b32 s49, v21, 10 +; GFX8-NEXT: v_readlane_b32 s48, v21, 9 +; GFX8-NEXT: v_readlane_b32 s47, v21, 8 +; GFX8-NEXT: v_readlane_b32 s46, v21, 7 ; GFX8-NEXT: v_readlane_b32 s37, v21, 6 ; GFX8-NEXT: v_readlane_b32 s36, v21, 5 ; GFX8-NEXT: v_readlane_b32 s35, v21, 4 @@ -981,60 +689,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: v_writelane_b32 v21, s35, 4 ; GFX900-NEXT: v_writelane_b32 v21, s36, 5 ; GFX900-NEXT: v_writelane_b32 v21, s37, 6 -; GFX900-NEXT: v_writelane_b32 v21, s38, 7 -; GFX900-NEXT: v_writelane_b32 v21, s39, 8 -; GFX900-NEXT: v_writelane_b32 v21, s40, 9 -; GFX900-NEXT: v_writelane_b32 v21, s41, 10 -; GFX900-NEXT: v_writelane_b32 v21, s42, 11 -; GFX900-NEXT: v_writelane_b32 v21, s43, 12 -; GFX900-NEXT: v_writelane_b32 v21, s44, 13 -; GFX900-NEXT: v_writelane_b32 v21, s45, 14 -; GFX900-NEXT: v_writelane_b32 v21, s46, 15 -; GFX900-NEXT: v_writelane_b32 v21, s47, 16 -; GFX900-NEXT: v_writelane_b32 v21, s48, 17 -; GFX900-NEXT: v_writelane_b32 v21, s49, 18 -; GFX900-NEXT: v_writelane_b32 v21, s50, 19 -; GFX900-NEXT: v_writelane_b32 v21, s51, 20 -; GFX900-NEXT: v_writelane_b32 v21, s52, 21 -; GFX900-NEXT: v_writelane_b32 v21, s53, 22 -; GFX900-NEXT: v_writelane_b32 v21, s54, 23 -; GFX900-NEXT: v_writelane_b32 v21, s55, 24 -; GFX900-NEXT: v_writelane_b32 v21, s56, 25 -; GFX900-NEXT: v_writelane_b32 v21, s57, 26 +; GFX900-NEXT: v_writelane_b32 v21, s46, 7 +; GFX900-NEXT: v_writelane_b32 v21, s47, 8 +; GFX900-NEXT: v_writelane_b32 v21, s48, 9 +; GFX900-NEXT: v_writelane_b32 v21, s49, 10 +; GFX900-NEXT: v_writelane_b32 v21, s50, 11 +; GFX900-NEXT: v_writelane_b32 v21, s51, 12 +; GFX900-NEXT: v_writelane_b32 v21, s52, 13 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v21, s58, 27 +; GFX900-NEXT: v_writelane_b32 v21, s53, 14 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v22, 16, v22 -; GFX900-NEXT: v_writelane_b32 v21, s59, 28 ; GFX900-NEXT: v_readfirstlane_b32 s59, v22 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v21, 28 -; GFX900-NEXT: v_readlane_b32 s58, v21, 27 -; GFX900-NEXT: v_readlane_b32 s57, v21, 26 -; GFX900-NEXT: v_readlane_b32 s56, v21, 25 -; GFX900-NEXT: v_readlane_b32 s55, v21, 24 -; GFX900-NEXT: v_readlane_b32 s54, v21, 23 -; GFX900-NEXT: v_readlane_b32 s53, v21, 22 -; GFX900-NEXT: v_readlane_b32 s52, v21, 21 -; GFX900-NEXT: v_readlane_b32 s51, v21, 20 -; GFX900-NEXT: v_readlane_b32 s50, v21, 19 -; GFX900-NEXT: v_readlane_b32 s49, v21, 18 -; GFX900-NEXT: v_readlane_b32 s48, v21, 17 -; GFX900-NEXT: v_readlane_b32 s47, v21, 16 -; GFX900-NEXT: v_readlane_b32 s46, v21, 15 -; GFX900-NEXT: v_readlane_b32 s45, v21, 14 -; GFX900-NEXT: v_readlane_b32 s44, v21, 13 -; GFX900-NEXT: v_readlane_b32 s43, v21, 12 -; GFX900-NEXT: v_readlane_b32 s42, v21, 11 -; GFX900-NEXT: v_readlane_b32 s41, v21, 10 -; GFX900-NEXT: v_readlane_b32 s40, v21, 9 -; GFX900-NEXT: v_readlane_b32 s39, v21, 8 -; GFX900-NEXT: v_readlane_b32 s38, v21, 7 +; GFX900-NEXT: v_readlane_b32 s53, v21, 14 +; GFX900-NEXT: v_readlane_b32 s52, v21, 13 +; GFX900-NEXT: v_readlane_b32 s51, v21, 12 +; GFX900-NEXT: v_readlane_b32 s50, v21, 11 +; GFX900-NEXT: v_readlane_b32 s49, v21, 10 +; GFX900-NEXT: v_readlane_b32 s48, v21, 9 +; GFX900-NEXT: v_readlane_b32 s47, v21, 8 +; GFX900-NEXT: v_readlane_b32 s46, v21, 7 ; GFX900-NEXT: v_readlane_b32 s37, v21, 6 ; GFX900-NEXT: v_readlane_b32 s36, v21, 5 ; GFX900-NEXT: v_readlane_b32 s35, v21, 4 @@ -1063,31 +743,15 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: v_writelane_b32 v21, s35, 4 ; GFX942-NEXT: v_writelane_b32 v21, s36, 5 ; GFX942-NEXT: v_writelane_b32 v21, s37, 6 -; GFX942-NEXT: v_writelane_b32 v21, s38, 7 -; GFX942-NEXT: v_writelane_b32 v21, s39, 8 -; GFX942-NEXT: v_writelane_b32 v21, s40, 9 -; GFX942-NEXT: v_writelane_b32 v21, s41, 10 -; GFX942-NEXT: v_writelane_b32 v21, s42, 11 -; GFX942-NEXT: v_writelane_b32 v21, s43, 12 -; GFX942-NEXT: v_writelane_b32 v21, s44, 13 -; GFX942-NEXT: v_writelane_b32 v21, s45, 14 -; GFX942-NEXT: v_writelane_b32 v21, s46, 15 -; GFX942-NEXT: v_writelane_b32 v21, s47, 16 -; GFX942-NEXT: v_writelane_b32 v21, s48, 17 -; GFX942-NEXT: v_writelane_b32 v21, s49, 18 -; GFX942-NEXT: v_writelane_b32 v21, s50, 19 -; GFX942-NEXT: v_writelane_b32 v21, s51, 20 -; GFX942-NEXT: v_writelane_b32 v21, s52, 21 -; GFX942-NEXT: v_writelane_b32 v21, s53, 22 -; GFX942-NEXT: v_writelane_b32 v21, s54, 23 -; GFX942-NEXT: v_writelane_b32 v21, s55, 24 -; GFX942-NEXT: v_writelane_b32 v21, s56, 25 -; GFX942-NEXT: v_writelane_b32 v21, s57, 26 -; GFX942-NEXT: v_writelane_b32 v21, s58, 27 -; GFX942-NEXT: v_writelane_b32 v21, s59, 28 -; GFX942-NEXT: v_writelane_b32 v21, s60, 29 -; GFX942-NEXT: v_writelane_b32 v21, s61, 30 +; GFX942-NEXT: v_writelane_b32 v21, s46, 7 +; GFX942-NEXT: v_writelane_b32 v21, s47, 8 +; GFX942-NEXT: v_writelane_b32 v21, s48, 9 +; GFX942-NEXT: v_writelane_b32 v21, s49, 10 +; GFX942-NEXT: v_writelane_b32 v21, s50, 11 +; GFX942-NEXT: v_writelane_b32 v21, s51, 12 +; GFX942-NEXT: v_writelane_b32 v21, s52, 13 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: v_writelane_b32 v21, s53, 14 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX942-NEXT: ;;#ASMEND @@ -1098,30 +762,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s61, v21, 30 -; GFX942-NEXT: v_readlane_b32 s60, v21, 29 -; GFX942-NEXT: v_readlane_b32 s59, v21, 28 -; GFX942-NEXT: v_readlane_b32 s58, v21, 27 -; GFX942-NEXT: v_readlane_b32 s57, v21, 26 -; GFX942-NEXT: v_readlane_b32 s56, v21, 25 -; GFX942-NEXT: v_readlane_b32 s55, v21, 24 -; GFX942-NEXT: v_readlane_b32 s54, v21, 23 -; GFX942-NEXT: v_readlane_b32 s53, v21, 22 -; GFX942-NEXT: v_readlane_b32 s52, v21, 21 -; GFX942-NEXT: v_readlane_b32 s51, v21, 20 -; GFX942-NEXT: v_readlane_b32 s50, v21, 19 -; GFX942-NEXT: v_readlane_b32 s49, v21, 18 -; GFX942-NEXT: v_readlane_b32 s48, v21, 17 -; GFX942-NEXT: v_readlane_b32 s47, v21, 16 -; GFX942-NEXT: v_readlane_b32 s46, v21, 15 -; GFX942-NEXT: v_readlane_b32 s45, v21, 14 -; GFX942-NEXT: v_readlane_b32 s44, v21, 13 -; GFX942-NEXT: v_readlane_b32 s43, v21, 12 -; GFX942-NEXT: v_readlane_b32 s42, v21, 11 -; GFX942-NEXT: v_readlane_b32 s41, v21, 10 -; GFX942-NEXT: v_readlane_b32 s40, v21, 9 -; GFX942-NEXT: v_readlane_b32 s39, v21, 8 -; GFX942-NEXT: v_readlane_b32 s38, v21, 7 +; GFX942-NEXT: v_readlane_b32 s53, v21, 14 +; GFX942-NEXT: v_readlane_b32 s52, v21, 13 +; GFX942-NEXT: v_readlane_b32 s51, v21, 12 +; GFX942-NEXT: v_readlane_b32 s50, v21, 11 +; GFX942-NEXT: v_readlane_b32 s49, v21, 10 +; GFX942-NEXT: v_readlane_b32 s48, v21, 9 +; GFX942-NEXT: v_readlane_b32 s47, v21, 8 +; GFX942-NEXT: v_readlane_b32 s46, v21, 7 ; GFX942-NEXT: v_readlane_b32 s37, v21, 6 ; GFX942-NEXT: v_readlane_b32 s36, v21, 5 ; GFX942-NEXT: v_readlane_b32 s35, v21, 4 @@ -1145,66 +793,38 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_writelane_b32 v21, s30, 0 +; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_1-NEXT: v_writelane_b32 v21, s31, 1 ; GFX10_1-NEXT: v_writelane_b32 v21, s33, 2 ; GFX10_1-NEXT: v_writelane_b32 v21, s34, 3 ; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v21, s40, 9 -; GFX10_1-NEXT: v_writelane_b32 v21, s41, 10 -; GFX10_1-NEXT: v_writelane_b32 v21, s42, 11 -; GFX10_1-NEXT: v_writelane_b32 v21, s43, 12 -; GFX10_1-NEXT: v_writelane_b32 v21, s44, 13 -; GFX10_1-NEXT: v_writelane_b32 v21, s45, 14 -; GFX10_1-NEXT: v_writelane_b32 v21, s46, 15 -; GFX10_1-NEXT: v_writelane_b32 v21, s47, 16 -; GFX10_1-NEXT: v_writelane_b32 v21, s48, 17 -; GFX10_1-NEXT: v_writelane_b32 v21, s49, 18 -; GFX10_1-NEXT: v_writelane_b32 v21, s50, 19 -; GFX10_1-NEXT: v_writelane_b32 v21, s51, 20 -; GFX10_1-NEXT: v_writelane_b32 v21, s52, 21 -; GFX10_1-NEXT: v_writelane_b32 v21, s53, 22 -; GFX10_1-NEXT: v_writelane_b32 v21, s54, 23 -; GFX10_1-NEXT: v_writelane_b32 v21, s55, 24 -; GFX10_1-NEXT: v_writelane_b32 v21, s56, 25 -; GFX10_1-NEXT: v_writelane_b32 v21, s57, 26 -; GFX10_1-NEXT: v_writelane_b32 v21, s58, 27 +; GFX10_1-NEXT: v_writelane_b32 v21, s46, 7 +; GFX10_1-NEXT: v_writelane_b32 v21, s47, 8 +; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9 +; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10 +; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11 +; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12 +; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13 +; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 -; GFX10_1-NEXT: v_writelane_b32 v21, s59, 28 -; GFX10_1-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v21, 28 -; GFX10_1-NEXT: v_readlane_b32 s58, v21, 27 -; GFX10_1-NEXT: v_readlane_b32 s57, v21, 26 -; GFX10_1-NEXT: v_readlane_b32 s56, v21, 25 -; GFX10_1-NEXT: v_readlane_b32 s55, v21, 24 -; GFX10_1-NEXT: v_readlane_b32 s54, v21, 23 -; GFX10_1-NEXT: v_readlane_b32 s53, v21, 22 -; GFX10_1-NEXT: v_readlane_b32 s52, v21, 21 -; GFX10_1-NEXT: v_readlane_b32 s51, v21, 20 -; GFX10_1-NEXT: v_readlane_b32 s50, v21, 19 -; GFX10_1-NEXT: v_readlane_b32 s49, v21, 18 -; GFX10_1-NEXT: v_readlane_b32 s48, v21, 17 -; GFX10_1-NEXT: v_readlane_b32 s47, v21, 16 -; GFX10_1-NEXT: v_readlane_b32 s46, v21, 15 -; GFX10_1-NEXT: v_readlane_b32 s45, v21, 14 -; GFX10_1-NEXT: v_readlane_b32 s44, v21, 13 -; GFX10_1-NEXT: v_readlane_b32 s43, v21, 12 -; GFX10_1-NEXT: v_readlane_b32 s42, v21, 11 -; GFX10_1-NEXT: v_readlane_b32 s41, v21, 10 -; GFX10_1-NEXT: v_readlane_b32 s40, v21, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 +; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14 +; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13 +; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12 +; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11 +; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10 +; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9 +; GFX10_1-NEXT: v_readlane_b32 s47, v21, 8 +; GFX10_1-NEXT: v_readlane_b32 s46, v21, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 @@ -1228,66 +848,38 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_3-NEXT: buffer_store_dword v21, off, s[0:3], s5 ; 4-byte Folded Spill ; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_writelane_b32 v21, s30, 0 +; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_3-NEXT: v_writelane_b32 v21, s31, 1 ; GFX10_3-NEXT: v_writelane_b32 v21, s33, 2 ; GFX10_3-NEXT: v_writelane_b32 v21, s34, 3 ; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v21, s40, 9 -; GFX10_3-NEXT: v_writelane_b32 v21, s41, 10 -; GFX10_3-NEXT: v_writelane_b32 v21, s42, 11 -; GFX10_3-NEXT: v_writelane_b32 v21, s43, 12 -; GFX10_3-NEXT: v_writelane_b32 v21, s44, 13 -; GFX10_3-NEXT: v_writelane_b32 v21, s45, 14 -; GFX10_3-NEXT: v_writelane_b32 v21, s46, 15 -; GFX10_3-NEXT: v_writelane_b32 v21, s47, 16 -; GFX10_3-NEXT: v_writelane_b32 v21, s48, 17 -; GFX10_3-NEXT: v_writelane_b32 v21, s49, 18 -; GFX10_3-NEXT: v_writelane_b32 v21, s50, 19 -; GFX10_3-NEXT: v_writelane_b32 v21, s51, 20 -; GFX10_3-NEXT: v_writelane_b32 v21, s52, 21 -; GFX10_3-NEXT: v_writelane_b32 v21, s53, 22 -; GFX10_3-NEXT: v_writelane_b32 v21, s54, 23 -; GFX10_3-NEXT: v_writelane_b32 v21, s55, 24 -; GFX10_3-NEXT: v_writelane_b32 v21, s56, 25 -; GFX10_3-NEXT: v_writelane_b32 v21, s57, 26 -; GFX10_3-NEXT: v_writelane_b32 v21, s58, 27 +; GFX10_3-NEXT: v_writelane_b32 v21, s46, 7 +; GFX10_3-NEXT: v_writelane_b32 v21, s47, 8 +; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9 +; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10 +; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11 +; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12 +; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13 +; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 -; GFX10_3-NEXT: v_writelane_b32 v21, s59, 28 -; GFX10_3-NEXT: s_and_b32 s59, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 ; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v21, 28 -; GFX10_3-NEXT: v_readlane_b32 s58, v21, 27 -; GFX10_3-NEXT: v_readlane_b32 s57, v21, 26 -; GFX10_3-NEXT: v_readlane_b32 s56, v21, 25 -; GFX10_3-NEXT: v_readlane_b32 s55, v21, 24 -; GFX10_3-NEXT: v_readlane_b32 s54, v21, 23 -; GFX10_3-NEXT: v_readlane_b32 s53, v21, 22 -; GFX10_3-NEXT: v_readlane_b32 s52, v21, 21 -; GFX10_3-NEXT: v_readlane_b32 s51, v21, 20 -; GFX10_3-NEXT: v_readlane_b32 s50, v21, 19 -; GFX10_3-NEXT: v_readlane_b32 s49, v21, 18 -; GFX10_3-NEXT: v_readlane_b32 s48, v21, 17 -; GFX10_3-NEXT: v_readlane_b32 s47, v21, 16 -; GFX10_3-NEXT: v_readlane_b32 s46, v21, 15 -; GFX10_3-NEXT: v_readlane_b32 s45, v21, 14 -; GFX10_3-NEXT: v_readlane_b32 s44, v21, 13 -; GFX10_3-NEXT: v_readlane_b32 s43, v21, 12 -; GFX10_3-NEXT: v_readlane_b32 s42, v21, 11 -; GFX10_3-NEXT: v_readlane_b32 s41, v21, 10 -; GFX10_3-NEXT: v_readlane_b32 s40, v21, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 +; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14 +; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13 +; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12 +; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11 +; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10 +; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9 +; GFX10_3-NEXT: v_readlane_b32 s47, v21, 8 +; GFX10_3-NEXT: v_readlane_b32 s46, v21, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 @@ -1310,72 +902,40 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: scratch_store_b32 off, v21, s1 ; 4-byte Folded Spill ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v21, s30, 0 +; GFX11-NEXT: s_and_b32 s59, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v21, s31, 1 ; GFX11-NEXT: v_writelane_b32 v21, s33, 2 ; GFX11-NEXT: v_writelane_b32 v21, s34, 3 ; GFX11-NEXT: v_writelane_b32 v21, s35, 4 ; GFX11-NEXT: v_writelane_b32 v21, s36, 5 ; GFX11-NEXT: v_writelane_b32 v21, s37, 6 -; GFX11-NEXT: v_writelane_b32 v21, s38, 7 -; GFX11-NEXT: v_writelane_b32 v21, s39, 8 -; GFX11-NEXT: v_writelane_b32 v21, s40, 9 -; GFX11-NEXT: v_writelane_b32 v21, s41, 10 -; GFX11-NEXT: v_writelane_b32 v21, s42, 11 -; GFX11-NEXT: v_writelane_b32 v21, s43, 12 -; GFX11-NEXT: v_writelane_b32 v21, s44, 13 -; GFX11-NEXT: v_writelane_b32 v21, s45, 14 -; GFX11-NEXT: v_writelane_b32 v21, s46, 15 -; GFX11-NEXT: v_writelane_b32 v21, s47, 16 -; GFX11-NEXT: v_writelane_b32 v21, s48, 17 -; GFX11-NEXT: v_writelane_b32 v21, s49, 18 -; GFX11-NEXT: v_writelane_b32 v21, s50, 19 -; GFX11-NEXT: v_writelane_b32 v21, s51, 20 -; GFX11-NEXT: v_writelane_b32 v21, s52, 21 -; GFX11-NEXT: v_writelane_b32 v21, s53, 22 -; GFX11-NEXT: v_writelane_b32 v21, s54, 23 -; GFX11-NEXT: v_writelane_b32 v21, s55, 24 -; GFX11-NEXT: v_writelane_b32 v21, s56, 25 -; GFX11-NEXT: v_writelane_b32 v21, s57, 26 -; GFX11-NEXT: v_writelane_b32 v21, s58, 27 +; GFX11-NEXT: v_writelane_b32 v21, s46, 7 +; GFX11-NEXT: v_writelane_b32 v21, s47, 8 +; GFX11-NEXT: v_writelane_b32 v21, s48, 9 +; GFX11-NEXT: v_writelane_b32 v21, s49, 10 +; GFX11-NEXT: v_writelane_b32 v21, s50, 11 +; GFX11-NEXT: v_writelane_b32 v21, s51, 12 +; GFX11-NEXT: v_writelane_b32 v21, s52, 13 +; GFX11-NEXT: v_writelane_b32 v21, s53, 14 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v21, s59, 28 -; GFX11-NEXT: s_and_b32 s59, 0, exec_lo -; GFX11-NEXT: s_addc_u32 s32, s32, 16 +; GFX11-NEXT: s_addc_u32 s60, s32, 16 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_bitcmp1_b32 s32, 0 -; GFX11-NEXT: s_bitset0_b32 s32, 0 -; GFX11-NEXT: s_mov_b32 s59, s32 -; GFX11-NEXT: s_addc_u32 s32, s32, -16 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_bitcmp1_b32 s32, 0 -; GFX11-NEXT: s_bitset0_b32 s32, 0 +; GFX11-NEXT: s_bitcmp1_b32 s60, 0 +; GFX11-NEXT: s_bitset0_b32 s60, 0 +; GFX11-NEXT: s_mov_b32 s59, s60 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v21, 28 -; GFX11-NEXT: v_readlane_b32 s58, v21, 27 -; GFX11-NEXT: v_readlane_b32 s57, v21, 26 -; GFX11-NEXT: v_readlane_b32 s56, v21, 25 -; GFX11-NEXT: v_readlane_b32 s55, v21, 24 -; GFX11-NEXT: v_readlane_b32 s54, v21, 23 -; GFX11-NEXT: v_readlane_b32 s53, v21, 22 -; GFX11-NEXT: v_readlane_b32 s52, v21, 21 -; GFX11-NEXT: v_readlane_b32 s51, v21, 20 -; GFX11-NEXT: v_readlane_b32 s50, v21, 19 -; GFX11-NEXT: v_readlane_b32 s49, v21, 18 -; GFX11-NEXT: v_readlane_b32 s48, v21, 17 -; GFX11-NEXT: v_readlane_b32 s47, v21, 16 -; GFX11-NEXT: v_readlane_b32 s46, v21, 15 -; GFX11-NEXT: v_readlane_b32 s45, v21, 14 -; GFX11-NEXT: v_readlane_b32 s44, v21, 13 -; GFX11-NEXT: v_readlane_b32 s43, v21, 12 -; GFX11-NEXT: v_readlane_b32 s42, v21, 11 -; GFX11-NEXT: v_readlane_b32 s41, v21, 10 -; GFX11-NEXT: v_readlane_b32 s40, v21, 9 -; GFX11-NEXT: v_readlane_b32 s39, v21, 8 -; GFX11-NEXT: v_readlane_b32 s38, v21, 7 +; GFX11-NEXT: v_readlane_b32 s53, v21, 14 +; GFX11-NEXT: v_readlane_b32 s52, v21, 13 +; GFX11-NEXT: v_readlane_b32 s51, v21, 12 +; GFX11-NEXT: v_readlane_b32 s50, v21, 11 +; GFX11-NEXT: v_readlane_b32 s49, v21, 10 +; GFX11-NEXT: v_readlane_b32 s48, v21, 9 +; GFX11-NEXT: v_readlane_b32 s47, v21, 8 +; GFX11-NEXT: v_readlane_b32 s46, v21, 7 ; GFX11-NEXT: v_readlane_b32 s37, v21, 6 ; GFX11-NEXT: v_readlane_b32 s36, v21, 5 ; GFX11-NEXT: v_readlane_b32 s35, v21, 4 @@ -1402,65 +962,37 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v21, s30, 0 +; GFX12-NEXT: s_and_b32 s59, 0, exec_lo ; GFX12-NEXT: v_writelane_b32 v21, s31, 1 ; GFX12-NEXT: v_writelane_b32 v21, s33, 2 ; GFX12-NEXT: v_writelane_b32 v21, s34, 3 ; GFX12-NEXT: v_writelane_b32 v21, s35, 4 ; GFX12-NEXT: v_writelane_b32 v21, s36, 5 ; GFX12-NEXT: v_writelane_b32 v21, s37, 6 -; GFX12-NEXT: v_writelane_b32 v21, s38, 7 -; GFX12-NEXT: v_writelane_b32 v21, s39, 8 -; GFX12-NEXT: v_writelane_b32 v21, s40, 9 -; GFX12-NEXT: v_writelane_b32 v21, s41, 10 -; GFX12-NEXT: v_writelane_b32 v21, s42, 11 -; GFX12-NEXT: v_writelane_b32 v21, s43, 12 -; GFX12-NEXT: v_writelane_b32 v21, s44, 13 -; GFX12-NEXT: v_writelane_b32 v21, s45, 14 -; GFX12-NEXT: v_writelane_b32 v21, s46, 15 -; GFX12-NEXT: v_writelane_b32 v21, s47, 16 -; GFX12-NEXT: v_writelane_b32 v21, s48, 17 -; GFX12-NEXT: v_writelane_b32 v21, s49, 18 -; GFX12-NEXT: v_writelane_b32 v21, s50, 19 -; GFX12-NEXT: v_writelane_b32 v21, s51, 20 -; GFX12-NEXT: v_writelane_b32 v21, s52, 21 -; GFX12-NEXT: v_writelane_b32 v21, s53, 22 -; GFX12-NEXT: v_writelane_b32 v21, s54, 23 -; GFX12-NEXT: v_writelane_b32 v21, s55, 24 -; GFX12-NEXT: v_writelane_b32 v21, s56, 25 -; GFX12-NEXT: v_writelane_b32 v21, s57, 26 -; GFX12-NEXT: v_writelane_b32 v21, s58, 27 +; GFX12-NEXT: v_writelane_b32 v21, s46, 7 +; GFX12-NEXT: v_writelane_b32 v21, s47, 8 +; GFX12-NEXT: v_writelane_b32 v21, s48, 9 +; GFX12-NEXT: v_writelane_b32 v21, s49, 10 +; GFX12-NEXT: v_writelane_b32 v21, s50, 11 +; GFX12-NEXT: v_writelane_b32 v21, s51, 12 +; GFX12-NEXT: v_writelane_b32 v21, s52, 13 +; GFX12-NEXT: v_writelane_b32 v21, s53, 14 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_writelane_b32 v21, s59, 28 -; GFX12-NEXT: s_and_b32 s59, 0, exec_lo ; GFX12-NEXT: s_mov_b32 s59, s32 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_readlane_b32 s59, v21, 28 -; GFX12-NEXT: v_readlane_b32 s58, v21, 27 -; GFX12-NEXT: v_readlane_b32 s57, v21, 26 -; GFX12-NEXT: v_readlane_b32 s56, v21, 25 -; GFX12-NEXT: v_readlane_b32 s55, v21, 24 -; GFX12-NEXT: v_readlane_b32 s54, v21, 23 -; GFX12-NEXT: v_readlane_b32 s53, v21, 22 -; GFX12-NEXT: v_readlane_b32 s52, v21, 21 -; GFX12-NEXT: v_readlane_b32 s51, v21, 20 -; GFX12-NEXT: v_readlane_b32 s50, v21, 19 -; GFX12-NEXT: v_readlane_b32 s49, v21, 18 -; GFX12-NEXT: v_readlane_b32 s48, v21, 17 -; GFX12-NEXT: v_readlane_b32 s47, v21, 16 -; GFX12-NEXT: v_readlane_b32 s46, v21, 15 -; GFX12-NEXT: v_readlane_b32 s45, v21, 14 -; GFX12-NEXT: v_readlane_b32 s44, v21, 13 -; GFX12-NEXT: v_readlane_b32 s43, v21, 12 -; GFX12-NEXT: v_readlane_b32 s42, v21, 11 -; GFX12-NEXT: v_readlane_b32 s41, v21, 10 -; GFX12-NEXT: v_readlane_b32 s40, v21, 9 -; GFX12-NEXT: v_readlane_b32 s39, v21, 8 -; GFX12-NEXT: v_readlane_b32 s38, v21, 7 +; GFX12-NEXT: v_readlane_b32 s53, v21, 14 +; GFX12-NEXT: v_readlane_b32 s52, v21, 13 +; GFX12-NEXT: v_readlane_b32 s51, v21, 12 +; GFX12-NEXT: v_readlane_b32 s50, v21, 11 +; GFX12-NEXT: v_readlane_b32 s49, v21, 10 +; GFX12-NEXT: v_readlane_b32 s48, v21, 9 +; GFX12-NEXT: v_readlane_b32 s47, v21, 8 +; GFX12-NEXT: v_readlane_b32 s46, v21, 7 ; GFX12-NEXT: v_readlane_b32 s37, v21, 6 ; GFX12-NEXT: v_readlane_b32 s36, v21, 5 ; GFX12-NEXT: v_readlane_b32 s35, v21, 4 @@ -1523,8 +1055,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: v_writelane_b32 v23, s28, 28 -; GFX7-NEXT: v_writelane_b32 v23, s29, 29 +; GFX7-NEXT: v_writelane_b32 v23, s28, 15 +; GFX7-NEXT: v_writelane_b32 v23, s29, 16 ; GFX7-NEXT: v_writelane_b32 v23, s30, 0 ; GFX7-NEXT: v_writelane_b32 v23, s31, 1 ; GFX7-NEXT: v_writelane_b32 v23, s33, 2 @@ -1532,34 +1064,21 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s38, 7 -; GFX7-NEXT: v_writelane_b32 v23, s39, 8 -; GFX7-NEXT: v_writelane_b32 v23, s40, 9 -; GFX7-NEXT: v_writelane_b32 v23, s41, 10 -; GFX7-NEXT: v_writelane_b32 v23, s42, 11 -; GFX7-NEXT: v_writelane_b32 v23, s43, 12 -; GFX7-NEXT: v_writelane_b32 v23, s44, 13 -; GFX7-NEXT: v_writelane_b32 v23, s45, 14 -; GFX7-NEXT: v_writelane_b32 v23, s46, 15 -; GFX7-NEXT: v_writelane_b32 v23, s47, 16 -; GFX7-NEXT: v_writelane_b32 v23, s48, 17 -; GFX7-NEXT: v_writelane_b32 v23, s49, 18 -; GFX7-NEXT: v_writelane_b32 v23, s50, 19 -; GFX7-NEXT: v_writelane_b32 v23, s51, 20 -; GFX7-NEXT: v_writelane_b32 v23, s52, 21 -; GFX7-NEXT: v_writelane_b32 v23, s53, 22 -; GFX7-NEXT: v_writelane_b32 v23, s54, 23 -; GFX7-NEXT: v_writelane_b32 v23, s55, 24 +; GFX7-NEXT: v_writelane_b32 v23, s46, 7 +; GFX7-NEXT: v_writelane_b32 v23, s47, 8 +; GFX7-NEXT: v_writelane_b32 v23, s48, 9 +; GFX7-NEXT: v_writelane_b32 v23, s49, 10 +; GFX7-NEXT: v_writelane_b32 v23, s50, 11 ; GFX7-NEXT: s_lshr_b32 s5, s32, 6 -; GFX7-NEXT: v_writelane_b32 v23, s56, 25 +; GFX7-NEXT: v_writelane_b32 v23, s51, 12 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX7-NEXT: v_writelane_b32 v23, s57, 26 +; GFX7-NEXT: v_writelane_b32 v23, s52, 13 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: v_writelane_b32 v22, s4, 0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s59, 27 +; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -1570,27 +1089,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s59, v23, 27 -; GFX7-NEXT: v_readlane_b32 s57, v23, 26 -; GFX7-NEXT: v_readlane_b32 s56, v23, 25 -; GFX7-NEXT: v_readlane_b32 s55, v23, 24 -; GFX7-NEXT: v_readlane_b32 s54, v23, 23 -; GFX7-NEXT: v_readlane_b32 s53, v23, 22 -; GFX7-NEXT: v_readlane_b32 s52, v23, 21 -; GFX7-NEXT: v_readlane_b32 s51, v23, 20 -; GFX7-NEXT: v_readlane_b32 s50, v23, 19 -; GFX7-NEXT: v_readlane_b32 s49, v23, 18 -; GFX7-NEXT: v_readlane_b32 s48, v23, 17 -; GFX7-NEXT: v_readlane_b32 s47, v23, 16 -; GFX7-NEXT: v_readlane_b32 s46, v23, 15 -; GFX7-NEXT: v_readlane_b32 s45, v23, 14 -; GFX7-NEXT: v_readlane_b32 s44, v23, 13 -; GFX7-NEXT: v_readlane_b32 s43, v23, 12 -; GFX7-NEXT: v_readlane_b32 s42, v23, 11 -; GFX7-NEXT: v_readlane_b32 s41, v23, 10 -; GFX7-NEXT: v_readlane_b32 s40, v23, 9 -; GFX7-NEXT: v_readlane_b32 s39, v23, 8 -; GFX7-NEXT: v_readlane_b32 s38, v23, 7 +; GFX7-NEXT: v_readlane_b32 s53, v23, 14 +; GFX7-NEXT: v_readlane_b32 s52, v23, 13 +; GFX7-NEXT: v_readlane_b32 s51, v23, 12 +; GFX7-NEXT: v_readlane_b32 s50, v23, 11 +; GFX7-NEXT: v_readlane_b32 s49, v23, 10 +; GFX7-NEXT: v_readlane_b32 s48, v23, 9 +; GFX7-NEXT: v_readlane_b32 s47, v23, 8 +; GFX7-NEXT: v_readlane_b32 s46, v23, 7 ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 @@ -1598,8 +1104,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 -; GFX7-NEXT: v_readlane_b32 s28, v23, 28 -; GFX7-NEXT: v_readlane_b32 s29, v23, 29 +; GFX7-NEXT: v_readlane_b32 s28, v23, 15 +; GFX7-NEXT: v_readlane_b32 s29, v23, 16 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -1623,32 +1129,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: v_writelane_b32 v22, s35, 4 ; GFX8-NEXT: v_writelane_b32 v22, s36, 5 ; GFX8-NEXT: v_writelane_b32 v22, s37, 6 -; GFX8-NEXT: v_writelane_b32 v22, s38, 7 -; GFX8-NEXT: v_writelane_b32 v22, s39, 8 -; GFX8-NEXT: v_writelane_b32 v22, s40, 9 -; GFX8-NEXT: v_writelane_b32 v22, s41, 10 -; GFX8-NEXT: v_writelane_b32 v22, s42, 11 -; GFX8-NEXT: v_writelane_b32 v22, s43, 12 -; GFX8-NEXT: v_writelane_b32 v22, s44, 13 -; GFX8-NEXT: v_writelane_b32 v22, s45, 14 -; GFX8-NEXT: v_writelane_b32 v22, s46, 15 -; GFX8-NEXT: v_writelane_b32 v22, s47, 16 -; GFX8-NEXT: v_writelane_b32 v22, s48, 17 -; GFX8-NEXT: v_writelane_b32 v22, s49, 18 -; GFX8-NEXT: v_writelane_b32 v22, s50, 19 -; GFX8-NEXT: v_writelane_b32 v22, s51, 20 -; GFX8-NEXT: v_writelane_b32 v22, s52, 21 -; GFX8-NEXT: v_writelane_b32 v22, s53, 22 -; GFX8-NEXT: v_writelane_b32 v22, s54, 23 -; GFX8-NEXT: v_writelane_b32 v22, s55, 24 -; GFX8-NEXT: v_writelane_b32 v22, s56, 25 -; GFX8-NEXT: v_writelane_b32 v22, s57, 26 +; GFX8-NEXT: v_writelane_b32 v22, s46, 7 +; GFX8-NEXT: v_writelane_b32 v22, s47, 8 +; GFX8-NEXT: v_writelane_b32 v22, s48, 9 +; GFX8-NEXT: v_writelane_b32 v22, s49, 10 +; GFX8-NEXT: v_writelane_b32 v22, s50, 11 ; GFX8-NEXT: s_lshr_b32 s4, s32, 6 -; GFX8-NEXT: v_writelane_b32 v22, s59, 27 +; GFX8-NEXT: v_writelane_b32 v22, s51, 12 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_add_i32 s59, s4, 0x4240 +; GFX8-NEXT: v_writelane_b32 v22, s52, 13 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_writelane_b32 v22, s53, 14 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -1658,27 +1151,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s59, v22, 27 -; GFX8-NEXT: v_readlane_b32 s57, v22, 26 -; GFX8-NEXT: v_readlane_b32 s56, v22, 25 -; GFX8-NEXT: v_readlane_b32 s55, v22, 24 -; GFX8-NEXT: v_readlane_b32 s54, v22, 23 -; GFX8-NEXT: v_readlane_b32 s53, v22, 22 -; GFX8-NEXT: v_readlane_b32 s52, v22, 21 -; GFX8-NEXT: v_readlane_b32 s51, v22, 20 -; GFX8-NEXT: v_readlane_b32 s50, v22, 19 -; GFX8-NEXT: v_readlane_b32 s49, v22, 18 -; GFX8-NEXT: v_readlane_b32 s48, v22, 17 -; GFX8-NEXT: v_readlane_b32 s47, v22, 16 -; GFX8-NEXT: v_readlane_b32 s46, v22, 15 -; GFX8-NEXT: v_readlane_b32 s45, v22, 14 -; GFX8-NEXT: v_readlane_b32 s44, v22, 13 -; GFX8-NEXT: v_readlane_b32 s43, v22, 12 -; GFX8-NEXT: v_readlane_b32 s42, v22, 11 -; GFX8-NEXT: v_readlane_b32 s41, v22, 10 -; GFX8-NEXT: v_readlane_b32 s40, v22, 9 -; GFX8-NEXT: v_readlane_b32 s39, v22, 8 -; GFX8-NEXT: v_readlane_b32 s38, v22, 7 +; GFX8-NEXT: v_readlane_b32 s53, v22, 14 +; GFX8-NEXT: v_readlane_b32 s52, v22, 13 +; GFX8-NEXT: v_readlane_b32 s51, v22, 12 +; GFX8-NEXT: v_readlane_b32 s50, v22, 11 +; GFX8-NEXT: v_readlane_b32 s49, v22, 10 +; GFX8-NEXT: v_readlane_b32 s48, v22, 9 +; GFX8-NEXT: v_readlane_b32 s47, v22, 8 +; GFX8-NEXT: v_readlane_b32 s46, v22, 7 ; GFX8-NEXT: v_readlane_b32 s37, v22, 6 ; GFX8-NEXT: v_readlane_b32 s36, v22, 5 ; GFX8-NEXT: v_readlane_b32 s35, v22, 4 @@ -1707,32 +1187,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: v_writelane_b32 v22, s35, 4 ; GFX900-NEXT: v_writelane_b32 v22, s36, 5 ; GFX900-NEXT: v_writelane_b32 v22, s37, 6 -; GFX900-NEXT: v_writelane_b32 v22, s38, 7 -; GFX900-NEXT: v_writelane_b32 v22, s39, 8 -; GFX900-NEXT: v_writelane_b32 v22, s40, 9 -; GFX900-NEXT: v_writelane_b32 v22, s41, 10 -; GFX900-NEXT: v_writelane_b32 v22, s42, 11 -; GFX900-NEXT: v_writelane_b32 v22, s43, 12 -; GFX900-NEXT: v_writelane_b32 v22, s44, 13 -; GFX900-NEXT: v_writelane_b32 v22, s45, 14 -; GFX900-NEXT: v_writelane_b32 v22, s46, 15 -; GFX900-NEXT: v_writelane_b32 v22, s47, 16 -; GFX900-NEXT: v_writelane_b32 v22, s48, 17 -; GFX900-NEXT: v_writelane_b32 v22, s49, 18 -; GFX900-NEXT: v_writelane_b32 v22, s50, 19 -; GFX900-NEXT: v_writelane_b32 v22, s51, 20 -; GFX900-NEXT: v_writelane_b32 v22, s52, 21 -; GFX900-NEXT: v_writelane_b32 v22, s53, 22 -; GFX900-NEXT: v_writelane_b32 v22, s54, 23 -; GFX900-NEXT: v_writelane_b32 v22, s55, 24 -; GFX900-NEXT: v_writelane_b32 v22, s56, 25 -; GFX900-NEXT: v_writelane_b32 v22, s57, 26 +; GFX900-NEXT: v_writelane_b32 v22, s46, 7 +; GFX900-NEXT: v_writelane_b32 v22, s47, 8 +; GFX900-NEXT: v_writelane_b32 v22, s48, 9 +; GFX900-NEXT: v_writelane_b32 v22, s49, 10 +; GFX900-NEXT: v_writelane_b32 v22, s50, 11 ; GFX900-NEXT: s_lshr_b32 s4, s32, 6 -; GFX900-NEXT: v_writelane_b32 v22, s59, 27 +; GFX900-NEXT: v_writelane_b32 v22, s51, 12 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: s_add_i32 s59, s4, 0x4240 +; GFX900-NEXT: v_writelane_b32 v22, s52, 13 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_writelane_b32 v22, s53, 14 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -1742,27 +1209,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s59, v22, 27 -; GFX900-NEXT: v_readlane_b32 s57, v22, 26 -; GFX900-NEXT: v_readlane_b32 s56, v22, 25 -; GFX900-NEXT: v_readlane_b32 s55, v22, 24 -; GFX900-NEXT: v_readlane_b32 s54, v22, 23 -; GFX900-NEXT: v_readlane_b32 s53, v22, 22 -; GFX900-NEXT: v_readlane_b32 s52, v22, 21 -; GFX900-NEXT: v_readlane_b32 s51, v22, 20 -; GFX900-NEXT: v_readlane_b32 s50, v22, 19 -; GFX900-NEXT: v_readlane_b32 s49, v22, 18 -; GFX900-NEXT: v_readlane_b32 s48, v22, 17 -; GFX900-NEXT: v_readlane_b32 s47, v22, 16 -; GFX900-NEXT: v_readlane_b32 s46, v22, 15 -; GFX900-NEXT: v_readlane_b32 s45, v22, 14 -; GFX900-NEXT: v_readlane_b32 s44, v22, 13 -; GFX900-NEXT: v_readlane_b32 s43, v22, 12 -; GFX900-NEXT: v_readlane_b32 s42, v22, 11 -; GFX900-NEXT: v_readlane_b32 s41, v22, 10 -; GFX900-NEXT: v_readlane_b32 s40, v22, 9 -; GFX900-NEXT: v_readlane_b32 s39, v22, 8 -; GFX900-NEXT: v_readlane_b32 s38, v22, 7 +; GFX900-NEXT: v_readlane_b32 s53, v22, 14 +; GFX900-NEXT: v_readlane_b32 s52, v22, 13 +; GFX900-NEXT: v_readlane_b32 s51, v22, 12 +; GFX900-NEXT: v_readlane_b32 s50, v22, 11 +; GFX900-NEXT: v_readlane_b32 s49, v22, 10 +; GFX900-NEXT: v_readlane_b32 s48, v22, 9 +; GFX900-NEXT: v_readlane_b32 s47, v22, 8 +; GFX900-NEXT: v_readlane_b32 s46, v22, 7 ; GFX900-NEXT: v_readlane_b32 s37, v22, 6 ; GFX900-NEXT: v_readlane_b32 s36, v22, 5 ; GFX900-NEXT: v_readlane_b32 s35, v22, 4 @@ -1791,30 +1245,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: v_writelane_b32 v22, s35, 4 ; GFX942-NEXT: v_writelane_b32 v22, s36, 5 ; GFX942-NEXT: v_writelane_b32 v22, s37, 6 -; GFX942-NEXT: v_writelane_b32 v22, s38, 7 -; GFX942-NEXT: v_writelane_b32 v22, s39, 8 -; GFX942-NEXT: v_writelane_b32 v22, s40, 9 -; GFX942-NEXT: v_writelane_b32 v22, s41, 10 -; GFX942-NEXT: v_writelane_b32 v22, s42, 11 -; GFX942-NEXT: v_writelane_b32 v22, s43, 12 -; GFX942-NEXT: v_writelane_b32 v22, s44, 13 -; GFX942-NEXT: v_writelane_b32 v22, s45, 14 -; GFX942-NEXT: v_writelane_b32 v22, s46, 15 -; GFX942-NEXT: v_writelane_b32 v22, s47, 16 -; GFX942-NEXT: v_writelane_b32 v22, s48, 17 -; GFX942-NEXT: v_writelane_b32 v22, s49, 18 -; GFX942-NEXT: v_writelane_b32 v22, s50, 19 -; GFX942-NEXT: v_writelane_b32 v22, s51, 20 -; GFX942-NEXT: v_writelane_b32 v22, s52, 21 -; GFX942-NEXT: v_writelane_b32 v22, s53, 22 -; GFX942-NEXT: v_writelane_b32 v22, s54, 23 -; GFX942-NEXT: v_writelane_b32 v22, s55, 24 -; GFX942-NEXT: v_writelane_b32 v22, s56, 25 -; GFX942-NEXT: v_writelane_b32 v22, s57, 26 +; GFX942-NEXT: v_writelane_b32 v22, s46, 7 +; GFX942-NEXT: v_writelane_b32 v22, s47, 8 +; GFX942-NEXT: v_writelane_b32 v22, s48, 9 +; GFX942-NEXT: v_writelane_b32 v22, s49, 10 +; GFX942-NEXT: v_writelane_b32 v22, s50, 11 +; GFX942-NEXT: v_writelane_b32 v22, s51, 12 ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: v_writelane_b32 v22, s59, 27 +; GFX942-NEXT: v_writelane_b32 v22, s52, 13 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 -; GFX942-NEXT: v_writelane_b32 v22, s60, 28 +; GFX942-NEXT: v_writelane_b32 v22, s53, 14 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -1822,34 +1262,18 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_add_i32 s59, s32, 0x4240 -; GFX942-NEXT: v_writelane_b32 v22, s61, 29 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: v_readlane_b32 s61, v22, 29 -; GFX942-NEXT: v_readlane_b32 s60, v22, 28 -; GFX942-NEXT: v_readlane_b32 s59, v22, 27 -; GFX942-NEXT: v_readlane_b32 s57, v22, 26 -; GFX942-NEXT: v_readlane_b32 s56, v22, 25 -; GFX942-NEXT: v_readlane_b32 s55, v22, 24 -; GFX942-NEXT: v_readlane_b32 s54, v22, 23 -; GFX942-NEXT: v_readlane_b32 s53, v22, 22 -; GFX942-NEXT: v_readlane_b32 s52, v22, 21 -; GFX942-NEXT: v_readlane_b32 s51, v22, 20 -; GFX942-NEXT: v_readlane_b32 s50, v22, 19 -; GFX942-NEXT: v_readlane_b32 s49, v22, 18 -; GFX942-NEXT: v_readlane_b32 s48, v22, 17 -; GFX942-NEXT: v_readlane_b32 s47, v22, 16 -; GFX942-NEXT: v_readlane_b32 s46, v22, 15 -; GFX942-NEXT: v_readlane_b32 s45, v22, 14 -; GFX942-NEXT: v_readlane_b32 s44, v22, 13 -; GFX942-NEXT: v_readlane_b32 s43, v22, 12 -; GFX942-NEXT: v_readlane_b32 s42, v22, 11 -; GFX942-NEXT: v_readlane_b32 s41, v22, 10 -; GFX942-NEXT: v_readlane_b32 s40, v22, 9 -; GFX942-NEXT: v_readlane_b32 s39, v22, 8 -; GFX942-NEXT: v_readlane_b32 s38, v22, 7 +; GFX942-NEXT: v_readlane_b32 s53, v22, 14 +; GFX942-NEXT: v_readlane_b32 s52, v22, 13 +; GFX942-NEXT: v_readlane_b32 s51, v22, 12 +; GFX942-NEXT: v_readlane_b32 s50, v22, 11 +; GFX942-NEXT: v_readlane_b32 s49, v22, 10 +; GFX942-NEXT: v_readlane_b32 s48, v22, 9 +; GFX942-NEXT: v_readlane_b32 s47, v22, 8 +; GFX942-NEXT: v_readlane_b32 s46, v22, 7 ; GFX942-NEXT: v_readlane_b32 s37, v22, 6 ; GFX942-NEXT: v_readlane_b32 s36, v22, 5 ; GFX942-NEXT: v_readlane_b32 s35, v22, 4 @@ -1875,8 +1299,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 +; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240 ; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND @@ -1885,56 +1311,28 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7 -; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8 -; GFX10_1-NEXT: v_writelane_b32 v22, s40, 9 -; GFX10_1-NEXT: v_writelane_b32 v22, s41, 10 -; GFX10_1-NEXT: v_writelane_b32 v22, s42, 11 -; GFX10_1-NEXT: v_writelane_b32 v22, s43, 12 -; GFX10_1-NEXT: v_writelane_b32 v22, s44, 13 -; GFX10_1-NEXT: v_writelane_b32 v22, s45, 14 -; GFX10_1-NEXT: v_writelane_b32 v22, s46, 15 -; GFX10_1-NEXT: v_writelane_b32 v22, s47, 16 -; GFX10_1-NEXT: v_writelane_b32 v22, s48, 17 -; GFX10_1-NEXT: v_writelane_b32 v22, s49, 18 -; GFX10_1-NEXT: v_writelane_b32 v22, s50, 19 -; GFX10_1-NEXT: v_writelane_b32 v22, s51, 20 -; GFX10_1-NEXT: v_writelane_b32 v22, s52, 21 -; GFX10_1-NEXT: v_writelane_b32 v22, s53, 22 -; GFX10_1-NEXT: v_writelane_b32 v22, s54, 23 -; GFX10_1-NEXT: v_writelane_b32 v22, s55, 24 -; GFX10_1-NEXT: v_writelane_b32 v22, s56, 25 -; GFX10_1-NEXT: v_writelane_b32 v22, s57, 26 -; GFX10_1-NEXT: v_writelane_b32 v22, s59, 27 -; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240 -; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_1-NEXT: v_writelane_b32 v22, s46, 7 +; GFX10_1-NEXT: v_writelane_b32 v22, s47, 8 +; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9 +; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10 +; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11 +; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12 +; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13 +; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX10_1-NEXT: ;;#ASMEND -; GFX10_1-NEXT: v_readlane_b32 s59, v22, 27 -; GFX10_1-NEXT: v_readlane_b32 s57, v22, 26 -; GFX10_1-NEXT: v_readlane_b32 s56, v22, 25 -; GFX10_1-NEXT: v_readlane_b32 s55, v22, 24 -; GFX10_1-NEXT: v_readlane_b32 s54, v22, 23 -; GFX10_1-NEXT: v_readlane_b32 s53, v22, 22 -; GFX10_1-NEXT: v_readlane_b32 s52, v22, 21 -; GFX10_1-NEXT: v_readlane_b32 s51, v22, 20 -; GFX10_1-NEXT: v_readlane_b32 s50, v22, 19 -; GFX10_1-NEXT: v_readlane_b32 s49, v22, 18 -; GFX10_1-NEXT: v_readlane_b32 s48, v22, 17 -; GFX10_1-NEXT: v_readlane_b32 s47, v22, 16 -; GFX10_1-NEXT: v_readlane_b32 s46, v22, 15 -; GFX10_1-NEXT: v_readlane_b32 s45, v22, 14 -; GFX10_1-NEXT: v_readlane_b32 s44, v22, 13 -; GFX10_1-NEXT: v_readlane_b32 s43, v22, 12 -; GFX10_1-NEXT: v_readlane_b32 s42, v22, 11 -; GFX10_1-NEXT: v_readlane_b32 s41, v22, 10 -; GFX10_1-NEXT: v_readlane_b32 s40, v22, 9 -; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8 -; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7 +; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14 +; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13 +; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12 +; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11 +; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10 +; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9 +; GFX10_1-NEXT: v_readlane_b32 s47, v22, 8 +; GFX10_1-NEXT: v_readlane_b32 s46, v22, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4 @@ -1960,8 +1358,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 +; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240 ; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 +; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND @@ -1970,56 +1370,28 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7 -; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8 -; GFX10_3-NEXT: v_writelane_b32 v22, s40, 9 -; GFX10_3-NEXT: v_writelane_b32 v22, s41, 10 -; GFX10_3-NEXT: v_writelane_b32 v22, s42, 11 -; GFX10_3-NEXT: v_writelane_b32 v22, s43, 12 -; GFX10_3-NEXT: v_writelane_b32 v22, s44, 13 -; GFX10_3-NEXT: v_writelane_b32 v22, s45, 14 -; GFX10_3-NEXT: v_writelane_b32 v22, s46, 15 -; GFX10_3-NEXT: v_writelane_b32 v22, s47, 16 -; GFX10_3-NEXT: v_writelane_b32 v22, s48, 17 -; GFX10_3-NEXT: v_writelane_b32 v22, s49, 18 -; GFX10_3-NEXT: v_writelane_b32 v22, s50, 19 -; GFX10_3-NEXT: v_writelane_b32 v22, s51, 20 -; GFX10_3-NEXT: v_writelane_b32 v22, s52, 21 -; GFX10_3-NEXT: v_writelane_b32 v22, s53, 22 -; GFX10_3-NEXT: v_writelane_b32 v22, s54, 23 -; GFX10_3-NEXT: v_writelane_b32 v22, s55, 24 -; GFX10_3-NEXT: v_writelane_b32 v22, s56, 25 -; GFX10_3-NEXT: v_writelane_b32 v22, s57, 26 -; GFX10_3-NEXT: v_writelane_b32 v22, s59, 27 -; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240 -; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo +; GFX10_3-NEXT: v_writelane_b32 v22, s46, 7 +; GFX10_3-NEXT: v_writelane_b32 v22, s47, 8 +; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9 +; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10 +; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11 +; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12 +; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13 +; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX10_3-NEXT: ;;#ASMEND -; GFX10_3-NEXT: v_readlane_b32 s59, v22, 27 -; GFX10_3-NEXT: v_readlane_b32 s57, v22, 26 -; GFX10_3-NEXT: v_readlane_b32 s56, v22, 25 -; GFX10_3-NEXT: v_readlane_b32 s55, v22, 24 -; GFX10_3-NEXT: v_readlane_b32 s54, v22, 23 -; GFX10_3-NEXT: v_readlane_b32 s53, v22, 22 -; GFX10_3-NEXT: v_readlane_b32 s52, v22, 21 -; GFX10_3-NEXT: v_readlane_b32 s51, v22, 20 -; GFX10_3-NEXT: v_readlane_b32 s50, v22, 19 -; GFX10_3-NEXT: v_readlane_b32 s49, v22, 18 -; GFX10_3-NEXT: v_readlane_b32 s48, v22, 17 -; GFX10_3-NEXT: v_readlane_b32 s47, v22, 16 -; GFX10_3-NEXT: v_readlane_b32 s46, v22, 15 -; GFX10_3-NEXT: v_readlane_b32 s45, v22, 14 -; GFX10_3-NEXT: v_readlane_b32 s44, v22, 13 -; GFX10_3-NEXT: v_readlane_b32 s43, v22, 12 -; GFX10_3-NEXT: v_readlane_b32 s42, v22, 11 -; GFX10_3-NEXT: v_readlane_b32 s41, v22, 10 -; GFX10_3-NEXT: v_readlane_b32 s40, v22, 9 -; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8 -; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7 +; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14 +; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13 +; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12 +; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11 +; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10 +; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9 +; GFX10_3-NEXT: v_readlane_b32 s47, v22, 8 +; GFX10_3-NEXT: v_readlane_b32 s46, v22, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4 @@ -2043,67 +1415,41 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v22, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s59, s32, 0x4240 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v22, s31, 1 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_writelane_b32 v22, s31, 1 ; GFX11-NEXT: v_writelane_b32 v22, s33, 2 ; GFX11-NEXT: v_writelane_b32 v22, s34, 3 ; GFX11-NEXT: v_writelane_b32 v22, s35, 4 ; GFX11-NEXT: v_writelane_b32 v22, s36, 5 ; GFX11-NEXT: v_writelane_b32 v22, s37, 6 -; GFX11-NEXT: v_writelane_b32 v22, s38, 7 -; GFX11-NEXT: v_writelane_b32 v22, s39, 8 -; GFX11-NEXT: v_writelane_b32 v22, s40, 9 -; GFX11-NEXT: v_writelane_b32 v22, s41, 10 -; GFX11-NEXT: v_writelane_b32 v22, s42, 11 -; GFX11-NEXT: v_writelane_b32 v22, s43, 12 -; GFX11-NEXT: v_writelane_b32 v22, s44, 13 -; GFX11-NEXT: v_writelane_b32 v22, s45, 14 -; GFX11-NEXT: v_writelane_b32 v22, s46, 15 -; GFX11-NEXT: v_writelane_b32 v22, s47, 16 -; GFX11-NEXT: v_writelane_b32 v22, s48, 17 -; GFX11-NEXT: v_writelane_b32 v22, s49, 18 -; GFX11-NEXT: v_writelane_b32 v22, s50, 19 -; GFX11-NEXT: v_writelane_b32 v22, s51, 20 -; GFX11-NEXT: v_writelane_b32 v22, s52, 21 -; GFX11-NEXT: v_writelane_b32 v22, s53, 22 -; GFX11-NEXT: v_writelane_b32 v22, s54, 23 -; GFX11-NEXT: v_writelane_b32 v22, s55, 24 -; GFX11-NEXT: v_writelane_b32 v22, s56, 25 -; GFX11-NEXT: v_writelane_b32 v22, s57, 26 -; GFX11-NEXT: v_writelane_b32 v22, s59, 27 -; GFX11-NEXT: s_add_i32 s59, s32, 0x4240 -; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v22, s46, 7 +; GFX11-NEXT: v_writelane_b32 v22, s47, 8 +; GFX11-NEXT: v_writelane_b32 v22, s48, 9 +; GFX11-NEXT: v_writelane_b32 v22, s49, 10 +; GFX11-NEXT: v_writelane_b32 v22, s50, 11 +; GFX11-NEXT: v_writelane_b32 v22, s51, 12 +; GFX11-NEXT: v_writelane_b32 v22, s52, 13 +; GFX11-NEXT: v_writelane_b32 v22, s53, 14 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: v_readlane_b32 s59, v22, 27 -; GFX11-NEXT: v_readlane_b32 s57, v22, 26 -; GFX11-NEXT: v_readlane_b32 s56, v22, 25 -; GFX11-NEXT: v_readlane_b32 s55, v22, 24 -; GFX11-NEXT: v_readlane_b32 s54, v22, 23 -; GFX11-NEXT: v_readlane_b32 s53, v22, 22 -; GFX11-NEXT: v_readlane_b32 s52, v22, 21 -; GFX11-NEXT: v_readlane_b32 s51, v22, 20 -; GFX11-NEXT: v_readlane_b32 s50, v22, 19 -; GFX11-NEXT: v_readlane_b32 s49, v22, 18 -; GFX11-NEXT: v_readlane_b32 s48, v22, 17 -; GFX11-NEXT: v_readlane_b32 s47, v22, 16 -; GFX11-NEXT: v_readlane_b32 s46, v22, 15 -; GFX11-NEXT: v_readlane_b32 s45, v22, 14 -; GFX11-NEXT: v_readlane_b32 s44, v22, 13 -; GFX11-NEXT: v_readlane_b32 s43, v22, 12 -; GFX11-NEXT: v_readlane_b32 s42, v22, 11 -; GFX11-NEXT: v_readlane_b32 s41, v22, 10 -; GFX11-NEXT: v_readlane_b32 s40, v22, 9 -; GFX11-NEXT: v_readlane_b32 s39, v22, 8 -; GFX11-NEXT: v_readlane_b32 s38, v22, 7 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s53, v22, 14 +; GFX11-NEXT: v_readlane_b32 s52, v22, 13 +; GFX11-NEXT: v_readlane_b32 s51, v22, 12 +; GFX11-NEXT: v_readlane_b32 s50, v22, 11 +; GFX11-NEXT: v_readlane_b32 s49, v22, 10 +; GFX11-NEXT: v_readlane_b32 s48, v22, 9 +; GFX11-NEXT: v_readlane_b32 s47, v22, 8 +; GFX11-NEXT: v_readlane_b32 s46, v22, 7 ; GFX11-NEXT: v_readlane_b32 s37, v22, 6 ; GFX11-NEXT: v_readlane_b32 s36, v22, 5 ; GFX11-NEXT: v_readlane_b32 s35, v22, 4 @@ -2130,7 +1476,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v22, s30, 0 +; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 +; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND @@ -2140,56 +1488,29 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: v_writelane_b32 v22, s35, 4 ; GFX12-NEXT: v_writelane_b32 v22, s36, 5 ; GFX12-NEXT: v_writelane_b32 v22, s37, 6 -; GFX12-NEXT: v_writelane_b32 v22, s38, 7 -; GFX12-NEXT: v_writelane_b32 v22, s39, 8 -; GFX12-NEXT: v_writelane_b32 v22, s40, 9 -; GFX12-NEXT: v_writelane_b32 v22, s41, 10 -; GFX12-NEXT: v_writelane_b32 v22, s42, 11 -; GFX12-NEXT: v_writelane_b32 v22, s43, 12 -; GFX12-NEXT: v_writelane_b32 v22, s44, 13 -; GFX12-NEXT: v_writelane_b32 v22, s45, 14 -; GFX12-NEXT: v_writelane_b32 v22, s46, 15 -; GFX12-NEXT: v_writelane_b32 v22, s47, 16 -; GFX12-NEXT: v_writelane_b32 v22, s48, 17 -; GFX12-NEXT: v_writelane_b32 v22, s49, 18 -; GFX12-NEXT: v_writelane_b32 v22, s50, 19 -; GFX12-NEXT: v_writelane_b32 v22, s51, 20 -; GFX12-NEXT: v_writelane_b32 v22, s52, 21 -; GFX12-NEXT: v_writelane_b32 v22, s53, 22 -; GFX12-NEXT: v_writelane_b32 v22, s54, 23 -; GFX12-NEXT: v_writelane_b32 v22, s55, 24 -; GFX12-NEXT: v_writelane_b32 v22, s56, 25 -; GFX12-NEXT: v_writelane_b32 v22, s57, 26 -; GFX12-NEXT: v_writelane_b32 v22, s59, 27 -; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200 -; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: v_writelane_b32 v22, s46, 7 +; GFX12-NEXT: v_writelane_b32 v22, s47, 8 +; GFX12-NEXT: v_writelane_b32 v22, s48, 9 +; GFX12-NEXT: v_writelane_b32 v22, s49, 10 +; GFX12-NEXT: v_writelane_b32 v22, s50, 11 +; GFX12-NEXT: v_writelane_b32 v22, s51, 12 +; GFX12-NEXT: v_writelane_b32 v22, s52, 13 +; GFX12-NEXT: v_writelane_b32 v22, s53, 14 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: v_readlane_b32 s59, v22, 27 -; GFX12-NEXT: v_readlane_b32 s57, v22, 26 -; GFX12-NEXT: v_readlane_b32 s56, v22, 25 -; GFX12-NEXT: v_readlane_b32 s55, v22, 24 -; GFX12-NEXT: v_readlane_b32 s54, v22, 23 -; GFX12-NEXT: v_readlane_b32 s53, v22, 22 -; GFX12-NEXT: v_readlane_b32 s52, v22, 21 -; GFX12-NEXT: v_readlane_b32 s51, v22, 20 -; GFX12-NEXT: v_readlane_b32 s50, v22, 19 -; GFX12-NEXT: v_readlane_b32 s49, v22, 18 -; GFX12-NEXT: v_readlane_b32 s48, v22, 17 -; GFX12-NEXT: v_readlane_b32 s47, v22, 16 -; GFX12-NEXT: v_readlane_b32 s46, v22, 15 -; GFX12-NEXT: v_readlane_b32 s45, v22, 14 -; GFX12-NEXT: v_readlane_b32 s44, v22, 13 -; GFX12-NEXT: v_readlane_b32 s43, v22, 12 -; GFX12-NEXT: v_readlane_b32 s42, v22, 11 -; GFX12-NEXT: v_readlane_b32 s41, v22, 10 -; GFX12-NEXT: v_readlane_b32 s40, v22, 9 -; GFX12-NEXT: v_readlane_b32 s39, v22, 8 -; GFX12-NEXT: v_readlane_b32 s38, v22, 7 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s53, v22, 14 +; GFX12-NEXT: v_readlane_b32 s52, v22, 13 +; GFX12-NEXT: v_readlane_b32 s51, v22, 12 +; GFX12-NEXT: v_readlane_b32 s50, v22, 11 +; GFX12-NEXT: v_readlane_b32 s49, v22, 10 +; GFX12-NEXT: v_readlane_b32 s48, v22, 9 +; GFX12-NEXT: v_readlane_b32 s47, v22, 8 +; GFX12-NEXT: v_readlane_b32 s46, v22, 7 ; GFX12-NEXT: v_readlane_b32 s37, v22, 6 ; GFX12-NEXT: v_readlane_b32 s36, v22, 5 ; GFX12-NEXT: v_readlane_b32 s35, v22, 4 diff --git a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll index 0112453e32bfc..790b934c2b1bf 100644 --- a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll +++ b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: I_Quit: ; CHECK: .set I_Quit.num_vgpr, max(41, amdgpu.max_num_vgpr) ; CHECK: .set I_Quit.num_agpr, max(0, amdgpu.max_num_agpr) -; CHECK: .set I_Quit.numbered_sgpr, max(48, amdgpu.max_num_sgpr) +; CHECK: .set I_Quit.numbered_sgpr, max(64, amdgpu.max_num_sgpr) ; CHECK: .set I_Quit.private_seg_size, 16 ; CHECK: .set I_Quit.uses_vcc, 1 ; CHECK: .set I_Quit.uses_flat_scratch, 1 @@ -80,7 +80,7 @@ define void @P_SetThingPosition() { ; CHECK-LABEL: P_SetupPsprites: ; CHECK: .set P_SetupPsprites.num_vgpr, max(41, amdgpu.max_num_vgpr) ; CHECK: .set P_SetupPsprites.num_agpr, max(0, amdgpu.max_num_agpr) -; CHECK: .set P_SetupPsprites.numbered_sgpr, max(48, amdgpu.max_num_sgpr) +; CHECK: .set P_SetupPsprites.numbered_sgpr, max(64, amdgpu.max_num_sgpr) ; CHECK: .set P_SetupPsprites.private_seg_size, 16 ; CHECK: .set P_SetupPsprites.uses_vcc, 1 ; CHECK: .set P_SetupPsprites.uses_flat_scratch, 1 @@ -110,7 +110,7 @@ define void @HU_Start() { ; CHECK-LABEL: P_SpawnPlayer: ; CHECK: .set P_SpawnPlayer.num_vgpr, max(43, G_PlayerReborn.num_vgpr, P_SetThingPosition.num_vgpr, P_SetupPsprites.num_vgpr, HU_Start.num_vgpr) ; CHECK: .set P_SpawnPlayer.num_agpr, max(0, G_PlayerReborn.num_agpr, P_SetThingPosition.num_agpr, P_SetupPsprites.num_agpr, HU_Start.num_agpr) -; CHECK: .set P_SpawnPlayer.numbered_sgpr, max(60, G_PlayerReborn.numbered_sgpr, P_SetThingPosition.numbered_sgpr, P_SetupPsprites.numbered_sgpr, HU_Start.numbered_sgpr) +; CHECK: .set P_SpawnPlayer.numbered_sgpr, max(84, G_PlayerReborn.numbered_sgpr, P_SetThingPosition.numbered_sgpr, P_SetupPsprites.numbered_sgpr, HU_Start.numbered_sgpr) ; CHECK: .set P_SpawnPlayer.private_seg_size, 16+(max(G_PlayerReborn.private_seg_size, P_SetThingPosition.private_seg_size, P_SetupPsprites.private_seg_size, HU_Start.private_seg_size)) ; CHECK: .set P_SpawnPlayer.uses_vcc, or(1, G_PlayerReborn.uses_vcc, P_SetThingPosition.uses_vcc, P_SetupPsprites.uses_vcc, HU_Start.uses_vcc) ; CHECK: .set P_SpawnPlayer.uses_flat_scratch, or(0, G_PlayerReborn.uses_flat_scratch, P_SetThingPosition.uses_flat_scratch, P_SetupPsprites.uses_flat_scratch, HU_Start.uses_flat_scratch) @@ -128,7 +128,7 @@ define void @P_SpawnPlayer() { ; CHECK-LABEL: I_Error: ; CHECK: .set I_Error.num_vgpr, max(41, amdgpu.max_num_vgpr) ; CHECK: .set I_Error.num_agpr, max(0, amdgpu.max_num_agpr) -; CHECK: .set I_Error.numbered_sgpr, max(48, amdgpu.max_num_sgpr) +; CHECK: .set I_Error.numbered_sgpr, max(64, amdgpu.max_num_sgpr) ; CHECK: .set I_Error.private_seg_size, 16 ; CHECK: .set I_Error.uses_vcc, 1 ; CHECK: .set I_Error.uses_flat_scratch, 1 @@ -144,7 +144,7 @@ define void @I_Error(...) { ; CHECK-LABEL: G_DoReborn: ; CHECK: .set G_DoReborn.num_vgpr, max(44, P_RemoveMobj.num_vgpr, P_SpawnMobj.num_vgpr, P_SpawnPlayer.num_vgpr, I_Error.num_vgpr) ; CHECK: .set G_DoReborn.num_agpr, max(0, P_RemoveMobj.num_agpr, P_SpawnMobj.num_agpr, P_SpawnPlayer.num_agpr, I_Error.num_agpr) -; CHECK: .set G_DoReborn.numbered_sgpr, max(72, P_RemoveMobj.numbered_sgpr, P_SpawnMobj.numbered_sgpr, P_SpawnPlayer.numbered_sgpr, I_Error.numbered_sgpr) +; CHECK: .set G_DoReborn.numbered_sgpr, max(104, P_RemoveMobj.numbered_sgpr, P_SpawnMobj.numbered_sgpr, P_SpawnPlayer.numbered_sgpr, I_Error.numbered_sgpr) ; CHECK: .set G_DoReborn.private_seg_size, 32+(max(P_RemoveMobj.private_seg_size, P_SpawnMobj.private_seg_size, P_SpawnPlayer.private_seg_size, I_Error.private_seg_size)) ; CHECK: .set G_DoReborn.uses_vcc, or(1, P_RemoveMobj.uses_vcc, P_SpawnMobj.uses_vcc, P_SpawnPlayer.uses_vcc, I_Error.uses_vcc) ; CHECK: .set G_DoReborn.uses_flat_scratch, or(0, P_RemoveMobj.uses_flat_scratch, P_SpawnMobj.uses_flat_scratch, P_SpawnPlayer.uses_flat_scratch, I_Error.uses_flat_scratch) @@ -218,7 +218,7 @@ define void @F_Ticker() { ; CHECK-LABEL: G_CheckDemoStatus: ; CHECK: .set G_CheckDemoStatus.num_vgpr, max(43, I_Quit.num_vgpr, D_AdvanceDemo.num_vgpr, I_Error.num_vgpr) ; CHECK: .set G_CheckDemoStatus.num_agpr, max(0, I_Quit.num_agpr, D_AdvanceDemo.num_agpr, I_Error.num_agpr) -; CHECK: .set G_CheckDemoStatus.numbered_sgpr, max(60, I_Quit.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, I_Error.numbered_sgpr) +; CHECK: .set G_CheckDemoStatus.numbered_sgpr, max(84, I_Quit.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, I_Error.numbered_sgpr) ; CHECK: .set G_CheckDemoStatus.private_seg_size, 32+(max(I_Quit.private_seg_size, D_AdvanceDemo.private_seg_size, I_Error.private_seg_size)) ; CHECK: .set G_CheckDemoStatus.uses_vcc, or(1, I_Quit.uses_vcc, D_AdvanceDemo.uses_vcc, I_Error.uses_vcc) ; CHECK: .set G_CheckDemoStatus.uses_flat_scratch, or(0, I_Quit.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, I_Error.uses_flat_scratch) @@ -264,7 +264,7 @@ define ptr @P_SaveGameFile() { ; CHECK-LABEL: R_FlatNumForName: ; CHECK: .set R_FlatNumForName.num_vgpr, max(42, I_Error.num_vgpr) ; CHECK: .set R_FlatNumForName.num_agpr, max(0, I_Error.num_agpr) -; CHECK: .set R_FlatNumForName.numbered_sgpr, max(48, I_Error.numbered_sgpr) +; CHECK: .set R_FlatNumForName.numbered_sgpr, max(64, I_Error.numbered_sgpr) ; CHECK: .set R_FlatNumForName.private_seg_size, 16+(max(I_Error.private_seg_size)) ; CHECK: .set R_FlatNumForName.uses_vcc, or(1, I_Error.uses_vcc) ; CHECK: .set R_FlatNumForName.uses_flat_scratch, or(0, I_Error.uses_flat_scratch) @@ -279,7 +279,7 @@ define i32 @R_FlatNumForName() { ; CHECK-LABEL: R_TextureNumForName: ; CHECK: .set R_TextureNumForName.num_vgpr, max(42, R_FlatNumForName.num_vgpr) ; CHECK: .set R_TextureNumForName.num_agpr, max(0, R_FlatNumForName.num_agpr) -; CHECK: .set R_TextureNumForName.numbered_sgpr, max(48, R_FlatNumForName.numbered_sgpr) +; CHECK: .set R_TextureNumForName.numbered_sgpr, max(64, R_FlatNumForName.numbered_sgpr) ; CHECK: .set R_TextureNumForName.private_seg_size, 16+(max(R_FlatNumForName.private_seg_size)) ; CHECK: .set R_TextureNumForName.uses_vcc, or(1, R_FlatNumForName.uses_vcc) ; CHECK: .set R_TextureNumForName.uses_flat_scratch, or(0, R_FlatNumForName.uses_flat_scratch) @@ -292,10 +292,10 @@ define i32 @R_TextureNumForName() { } ; CHECK-LABEL: G_Ticker: -; CHECK: .set G_Ticker.num_vgpr, max(46, G_DoReborn.num_vgpr, F_Ticker.num_vgpr, AM_Stop.num_vgpr, F_StartFinale.num_vgpr, D_AdvanceDemo.num_vgpr, R_FlatNumForName.num_vgpr, R_TextureNumForName.num_vgpr, P_TempSaveGameFile.num_vgpr, P_SaveGameFile.num_vgpr, I_Error.num_vgpr) +; CHECK: .set G_Ticker.num_vgpr, max(47, G_DoReborn.num_vgpr, F_Ticker.num_vgpr, AM_Stop.num_vgpr, F_StartFinale.num_vgpr, D_AdvanceDemo.num_vgpr, R_FlatNumForName.num_vgpr, R_TextureNumForName.num_vgpr, P_TempSaveGameFile.num_vgpr, P_SaveGameFile.num_vgpr, I_Error.num_vgpr) ; CHECK: .set G_Ticker.num_agpr, max(0, G_DoReborn.num_agpr, F_Ticker.num_agpr, AM_Stop.num_agpr, F_StartFinale.num_agpr, D_AdvanceDemo.num_agpr, R_FlatNumForName.num_agpr, R_TextureNumForName.num_agpr, P_TempSaveGameFile.num_agpr, P_SaveGameFile.num_agpr, I_Error.num_agpr) -; CHECK: .set G_Ticker.numbered_sgpr, max(84, G_DoReborn.numbered_sgpr, F_Ticker.numbered_sgpr, AM_Stop.numbered_sgpr, F_StartFinale.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, R_FlatNumForName.numbered_sgpr, R_TextureNumForName.numbered_sgpr, P_TempSaveGameFile.numbered_sgpr, P_SaveGameFile.numbered_sgpr, I_Error.numbered_sgpr) -; CHECK: .set G_Ticker.private_seg_size, 32+(max(G_DoReborn.private_seg_size, F_Ticker.private_seg_size, AM_Stop.private_seg_size, F_StartFinale.private_seg_size, D_AdvanceDemo.private_seg_size, R_FlatNumForName.private_seg_size, R_TextureNumForName.private_seg_size, P_TempSaveGameFile.private_seg_size, P_SaveGameFile.private_seg_size, I_Error.private_seg_size)) +; CHECK: .set G_Ticker.numbered_sgpr, max(105, G_DoReborn.numbered_sgpr, F_Ticker.numbered_sgpr, AM_Stop.numbered_sgpr, F_StartFinale.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, R_FlatNumForName.numbered_sgpr, R_TextureNumForName.numbered_sgpr, P_TempSaveGameFile.numbered_sgpr, P_SaveGameFile.numbered_sgpr, I_Error.numbered_sgpr) +; CHECK: .set G_Ticker.private_seg_size, 48+(max(G_DoReborn.private_seg_size, F_Ticker.private_seg_size, AM_Stop.private_seg_size, F_StartFinale.private_seg_size, D_AdvanceDemo.private_seg_size, R_FlatNumForName.private_seg_size, R_TextureNumForName.private_seg_size, P_TempSaveGameFile.private_seg_size, P_SaveGameFile.private_seg_size, I_Error.private_seg_size)) ; CHECK: .set G_Ticker.uses_vcc, or(1, G_DoReborn.uses_vcc, F_Ticker.uses_vcc, AM_Stop.uses_vcc, F_StartFinale.uses_vcc, D_AdvanceDemo.uses_vcc, R_FlatNumForName.uses_vcc, R_TextureNumForName.uses_vcc, P_TempSaveGameFile.uses_vcc, P_SaveGameFile.uses_vcc, I_Error.uses_vcc) ; CHECK: .set G_Ticker.uses_flat_scratch, or(0, G_DoReborn.uses_flat_scratch, F_Ticker.uses_flat_scratch, AM_Stop.uses_flat_scratch, F_StartFinale.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, R_FlatNumForName.uses_flat_scratch, R_TextureNumForName.uses_flat_scratch, P_TempSaveGameFile.uses_flat_scratch, P_SaveGameFile.uses_flat_scratch, I_Error.uses_flat_scratch) ; CHECK: .set G_Ticker.has_dyn_sized_stack, or(0, G_DoReborn.has_dyn_sized_stack, F_Ticker.has_dyn_sized_stack, AM_Stop.has_dyn_sized_stack, F_StartFinale.has_dyn_sized_stack, D_AdvanceDemo.has_dyn_sized_stack, R_FlatNumForName.has_dyn_sized_stack, R_TextureNumForName.has_dyn_sized_stack, P_TempSaveGameFile.has_dyn_sized_stack, P_SaveGameFile.has_dyn_sized_stack, I_Error.has_dyn_sized_stack) @@ -316,9 +316,9 @@ define void @G_Ticker() { } ; CHECK-LABEL: RunTic: -; CHECK: .set RunTic.num_vgpr, max(46, G_CheckDemoStatus.num_vgpr, D_AdvanceDemo.num_vgpr, G_Ticker.num_vgpr) +; CHECK: .set RunTic.num_vgpr, max(47, G_CheckDemoStatus.num_vgpr, D_AdvanceDemo.num_vgpr, G_Ticker.num_vgpr) ; CHECK: .set RunTic.num_agpr, max(0, G_CheckDemoStatus.num_agpr, D_AdvanceDemo.num_agpr, G_Ticker.num_agpr) -; CHECK: .set RunTic.numbered_sgpr, max(84, G_CheckDemoStatus.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, G_Ticker.numbered_sgpr) +; CHECK: .set RunTic.numbered_sgpr, max(105, G_CheckDemoStatus.numbered_sgpr, D_AdvanceDemo.numbered_sgpr, G_Ticker.numbered_sgpr) ; CHECK: .set RunTic.private_seg_size, 32+(max(G_CheckDemoStatus.private_seg_size, D_AdvanceDemo.private_seg_size, G_Ticker.private_seg_size)) ; CHECK: .set RunTic.uses_vcc, or(1, G_CheckDemoStatus.uses_vcc, D_AdvanceDemo.uses_vcc, G_Ticker.uses_vcc) ; CHECK: .set RunTic.uses_flat_scratch, or(0, G_CheckDemoStatus.uses_flat_scratch, D_AdvanceDemo.uses_flat_scratch, G_Ticker.uses_flat_scratch) diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index ba6524caf668d..8e957c1c31013 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -27,39 +27,25 @@ body: | liveins: $vgpr1 ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs - ; CHECK: liveins: $vgpr1, $vgpr2 + ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr4 = COPY $sgpr33 + ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 - ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192 + ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec + ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384 + ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 - ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; CHECK-NEXT: $sgpr33 = COPY $sgpr4 + ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec @@ -89,36 +75,24 @@ body: | liveins: $vgpr1 ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr - ; CHECK: liveins: $sgpr29, $vgpr1, $vgpr2 + ; CHECK: liveins: $sgpr29, $sgpr38, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2 + ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 + ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192 + ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec + ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384 + ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr38 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -158,16 +132,12 @@ body: | ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc - ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc + ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr38 = S_MOV_B32 8192 + ; CHECK-NEXT: $vgpr0, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr0, 0, implicit $exec + ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr38 = S_MOV_B32 16384 + ; CHECK-NEXT: $vgpr2, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr2, 0, implicit $exec ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index 162d12f651d4a..88556040486e2 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -23,64 +23,42 @@ body: | liveins: $vgpr1 ; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs - ; MUBUF: liveins: $vgpr1, $vgpr2 + ; MUBUF: liveins: $sgpr38, $sgpr39, $vgpr1 ; MUBUF-NEXT: {{ $}} - ; MUBUF-NEXT: $sgpr4 = COPY $sgpr33 + ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc - ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 - ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec - ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec - ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; MUBUF-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; MUBUF-NEXT: $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec + ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; MUBUF-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; MUBUF-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 - ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc - ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4 + ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 + ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 ; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc ; ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs - ; FLATSCR: liveins: $vgpr1, $vgpr2 + ; FLATSCR: liveins: $sgpr38, $sgpr39, $vgpr1 ; FLATSCR-NEXT: {{ $}} - ; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 + ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc - ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) - ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 - ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc - ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec - ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc - ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc - ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 - ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -16384, implicit-def $scc + ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc + ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec + ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc + ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; FLATSCR-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 - ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc - ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) - ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 + ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 + ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 ; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index a4f936a4d705c..1242e23db6c6a 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -22,34 +22,22 @@ body: | liveins: $vgpr1 ; CHECK-LABEL: name: scavenge_sgpr_pei - ; CHECK: liveins: $vgpr1, $vgpr2 + ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr4 = COPY $sgpr33 + ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc - ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 - ; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 4096, implicit-def $scc - ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33 - ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -4096, implicit-def $scc - ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc - ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 4096 + ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; CHECK-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 - ; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 262400, implicit-def dead $scc - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; CHECK-NEXT: $sgpr33 = COPY $sgpr4 + ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_OR_B32_e32 %stack.0, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index 33720ea9b28e6..9be182a767685 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -10,20 +10,20 @@ declare i64 @_Z13get_global_idj(i32) #0 define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX8-LABEL: clmem_read_simplified: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -91,20 +91,20 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: clmem_read_simplified: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -161,12 +161,12 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: clmem_read_simplified: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -174,8 +174,8 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -342,20 +342,20 @@ entry: define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX8-LABEL: clmem_read: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -469,20 +469,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; ; GFX900-LABEL: clmem_read: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX900-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX900-NEXT: s_mov_b32 s38, -1 -; GFX900-NEXT: s_mov_b32 s39, 0xe00000 -; GFX900-NEXT: s_add_u32 s36, s36, s11 -; GFX900-NEXT: s_addc_u32 s37, s37, 0 +; GFX900-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX900-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX900-NEXT: s_mov_b32 s50, -1 +; GFX900-NEXT: s_mov_b32 s51, 0xe00000 +; GFX900-NEXT: s_add_u32 s48, s48, s11 +; GFX900-NEXT: s_addc_u32 s49, s49, 0 ; GFX900-NEXT: s_getpc_b64 s[0:1] ; GFX900-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX900-NEXT: v_mov_b32_e32 v31, v0 ; GFX900-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX900-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX900-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX900-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: s_mov_b32 s32, 0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) @@ -586,12 +586,12 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: clmem_read: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -599,8 +599,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -698,20 +698,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; ; GFX90A-LABEL: clmem_read: ; GFX90A: ; %bb.0: ; %entry -; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX90A-NEXT: s_mov_b32 s38, -1 -; GFX90A-NEXT: s_mov_b32 s39, 0xe00000 -; GFX90A-NEXT: s_add_u32 s36, s36, s11 -; GFX90A-NEXT: s_addc_u32 s37, s37, 0 +; GFX90A-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX90A-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX90A-NEXT: s_mov_b32 s50, -1 +; GFX90A-NEXT: s_mov_b32 s51, 0xe00000 +; GFX90A-NEXT: s_add_u32 s48, s48, s11 +; GFX90A-NEXT: s_addc_u32 s49, s49, 0 ; GFX90A-NEXT: s_getpc_b64 s[0:1] ; GFX90A-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX90A-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX90A-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) @@ -1030,20 +1030,20 @@ while.end: ; preds = %while.cond.loopexit define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-LABEL: Address32: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1116,20 +1116,20 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: Address32: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1173,12 +1173,12 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: Address32: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -1186,8 +1186,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -1345,20 +1345,20 @@ entry: define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX8-LABEL: Offset64: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1397,20 +1397,20 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: Offset64: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1446,12 +1446,12 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: Offset64: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -1459,8 +1459,8 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -1569,20 +1569,20 @@ entry: define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX8-LABEL: p32Offset64: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1619,20 +1619,20 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: p32Offset64: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1664,12 +1664,12 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: p32Offset64: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -1677,8 +1677,8 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -1776,31 +1776,31 @@ entry: define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX8-LABEL: DiffBase: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s42, -1 -; GFX8-NEXT: s_mov_b32 s43, 0xe80000 -; GFX8-NEXT: s_add_u32 s40, s40, s11 -; GFX8-NEXT: s_addc_u32 s41, s41, 0 +; GFX8-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s66, -1 +; GFX8-NEXT: s_mov_b32 s67, 0xe80000 +; GFX8-NEXT: s_add_u32 s64, s64, s11 +; GFX8-NEXT: s_addc_u32 s65, s65, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 -; GFX8-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[40:41] +; GFX8-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX8-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff8000, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s37 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s36, v2 +; GFX8-NEXT: v_mov_b32_e32 v1, s49 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s48, v2 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: v_mov_b32_e32 v3, s39 -; GFX8-NEXT: v_add_u32_e32 v12, vcc, s38, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s51 +; GFX8-NEXT: v_add_u32_e32 v12, vcc, s50, v2 ; GFX8-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v0 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc @@ -1839,31 +1839,31 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; ; GFX9-LABEL: DiffBase: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s42, -1 -; GFX9-NEXT: s_mov_b32 s43, 0xe00000 -; GFX9-NEXT: s_add_u32 s40, s40, s11 -; GFX9-NEXT: s_addc_u32 s41, s41, 0 +; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s66, -1 +; GFX9-NEXT: s_mov_b32 s67, 0xe00000 +; GFX9-NEXT: s_add_u32 s64, s64, s11 +; GFX9-NEXT: s_addc_u32 s65, s65, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX9-NEXT: v_and_b32_e32 v16, 0xffff8000, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s37 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s36, v16 +; GFX9-NEXT: v_mov_b32_e32 v0, s49 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s48, v16 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s39 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s38, v16 +; GFX9-NEXT: v_mov_b32_e32 v0, s51 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s50, v16 ; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v0, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc @@ -1893,35 +1893,35 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v15, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37] +; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: DiffBase: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s42, -1 -; GFX10-NEXT: s_mov_b32 s43, 0x31c16000 -; GFX10-NEXT: s_add_u32 s40, s40, s11 -; GFX10-NEXT: s_addc_u32 s41, s41, 0 +; GFX10-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s66, -1 +; GFX10-NEXT: s_mov_b32 s67, 0x31c16000 +; GFX10-NEXT: s_add_u32 s64, s64, s11 +; GFX10-NEXT: s_addc_u32 s65, s65, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v31, v0 ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 -; GFX10-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[40:41] -; GFX10-NEXT: s_mov_b64 s[2:3], s[42:43] +; GFX10-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX10-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX10-NEXT: v_and_b32_e32 v16, 0xffff8000, v0 -; GFX10-NEXT: v_add_co_u32 v8, s0, s36, v16 -; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s37, 0, s0 -; GFX10-NEXT: v_add_co_u32 v12, s0, s38, v16 -; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s39, 0, s0 +; GFX10-NEXT: v_add_co_u32 v8, s0, s48, v16 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s49, 0, s0 +; GFX10-NEXT: v_add_co_u32 v12, s0, s50, v16 +; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s51, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v8, 0x1800 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v12, 0x3000 @@ -1952,7 +1952,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v15, v3, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37] +; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: DiffBase: @@ -1962,21 +1962,21 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX11-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX11-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 +; GFX11-NEXT: s_load_b128 s[48:51], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v12, 0xffff8000, v0 -; GFX11-NEXT: v_add_co_u32 v2, s0, s36, v12 +; GFX11-NEXT: v_add_co_u32 v2, s0, s48, v12 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s37, 0, s0 -; GFX11-NEXT: v_add_co_u32 v8, s0, s38, v12 +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s49, 0, s0 +; GFX11-NEXT: v_add_co_u32 v8, s0, s50, v12 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s39, 0, s0 +; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s51, 0, s0 ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0x2000 ; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, 0x2000, v8 @@ -2005,7 +2005,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-NEXT: global_store_b64 v12, v[0:1], s[36:37] +; GFX11-NEXT: global_store_b64 v12, v[0:1], s[48:49] ; GFX11-NEXT: s_endpgm ptr addrspace(1) %buffer2) { entry: @@ -2046,20 +2046,20 @@ entry: define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX8-LABEL: ReverseOrder: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -2127,20 +2127,20 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: ReverseOrder: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2196,12 +2196,12 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: ReverseOrder: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -2209,8 +2209,8 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -2382,20 +2382,20 @@ entry: define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buffer) { ; GFX8-LABEL: negativeoffset: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s38, -1 -; GFX8-NEXT: s_mov_b32 s39, 0xe80000 -; GFX8-NEXT: s_add_u32 s36, s36, s11 -; GFX8-NEXT: s_addc_u32 s37, s37, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -2423,20 +2423,20 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; ; GFX9-LABEL: negativeoffset: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s38, -1 -; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_add_u32 s36, s36, s11 -; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2463,12 +2463,12 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; ; GFX10-LABEL: negativeoffset: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s38, -1 -; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 -; GFX10-NEXT: s_add_u32 s36, s36, s11 -; GFX10-NEXT: s_addc_u32 s37, s37, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -2476,8 +2476,8 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] -; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir index c6ee557d970cd..814674804df57 100644 --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -41,63 +41,91 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr36_sgpr37 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr38_sgpr39 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr40_sgpr41 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr60 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr42_sgpr43 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit $exec - ; CHECK-NEXT: renamable $sgpr44_sgpr45 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr61 = S_MOV_B32 1083786240 + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec + ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240 + ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr44_sgpr45, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr34_sgpr35, implicit-def dead $scc ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.5(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr65 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr66 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr67 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr92 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr93 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr94 = COPY renamable $sgpr60 - ; CHECK-NEXT: renamable $sgpr95 = COPY renamable $sgpr60 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr47 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr80 = COPY killed renamable $sgpr52 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 + ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr49 = COPY killed renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr50 = COPY killed renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr52 = COPY killed renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr61 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr64 = COPY killed renamable $sgpr80 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr65 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} @@ -126,111 +154,117 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.6(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 renamable $sgpr38_sgpr39, undef renamable $sgpr46_sgpr47, implicit-def dead $scc - ; CHECK-NEXT: renamable $sgpr46_sgpr47 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr62_sgpr63, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr62_sgpr63 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr40_sgpr41, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr12_sgpr13, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr48_sgpr49 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: renamable $sgpr50_sgpr51 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $sgpr64_sgpr65 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $sgpr66_sgpr67 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec ; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr14, 11, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr64_sgpr65, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY renamable $sgpr60_sgpr61, implicit $exec + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr84_sgpr85, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1) - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr18_sgpr19, implicit $exec ; CHECK-NEXT: dead renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec - ; CHECK-NEXT: renamable $sgpr58 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr82 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr4_sgpr5 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr52_sgpr53 - ; CHECK-NEXT: renamable $sgpr54_sgpr55 = COPY killed renamable $sgpr6_sgpr7 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr54_sgpr55 - ; CHECK-NEXT: renamable $sgpr56_sgpr57 = COPY killed renamable $sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr56_sgpr57 + ; CHECK-NEXT: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr68_sgpr69 + ; CHECK-NEXT: renamable $sgpr78_sgpr79 = COPY killed renamable $sgpr6_sgpr7 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr78_sgpr79 + ; CHECK-NEXT: renamable $sgpr80_sgpr81 = COPY killed renamable $sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr80_sgpr81 ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr14 ; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr8 + ; CHECK-NEXT: renamable $sgpr84 = COPY killed renamable $sgpr8 ; CHECK-NEXT: renamable $sgpr33 = COPY killed renamable $sgpr16 - ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr14 + ; CHECK-NEXT: renamable $sgpr83 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr85 = COPY killed renamable $sgpr14 + ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY killed renamable $sgpr18_sgpr19 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr58_sgpr59 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 - ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr63 - ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr59 + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr36_sgpr37 + ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85 + ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr83 ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr33 - ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr52_sgpr53 - ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr54_sgpr55 - ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr62 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr56_sgpr57 + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr68_sgpr69 + ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr78_sgpr79 + ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr80_sgpr81 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr50_sgpr51 + ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr66_sgpr67 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.10: ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.12 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.11: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.12: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr46_sgpr47 + ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr62_sgpr63 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.13: ; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr42_sgpr43, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.14 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.14: ; CHECK-NEXT: successors: %bb.15(0x80000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.15: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.16: diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll index 570ea4b7132aa..0d25bc97ff775 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll +++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll @@ -246,10 +246,10 @@ bb3: ; CHECK-LABEL: {{^}}spill_func: ; GCN: NumSgprs: 104 ; GCN-GCNTRACKERS: NumSgprs: 104 -; GCN: NumVgprs: 3 -; GCN-GCNTRACKERS: NumVgprs: 4 -; GCN: ScratchSize: 12 -; GCN-GCNTRACKERS: ScratchSize: 16 +; GCN: NumVgprs: 2 +; GCN-GCNTRACKERS: NumVgprs: 3 +; GCN: ScratchSize: 8 +; GCN-GCNTRACKERS: ScratchSize: 12 define void @spill_func(ptr addrspace(1) %arg) #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll index d345b57d3d08b..4a65b0ec50484 100644 --- a/llvm/test/CodeGen/AMDGPU/select.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -1903,19 +1903,14 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32> ; VI-LABEL: v_vselect_v16f16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: v_writelane_b32 v31, s30, 0 -; VI-NEXT: v_writelane_b32 v31, s31, 1 ; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 ; VI-NEXT: v_cmp_eq_u32_e64 s[18:19], 0, v17 -; VI-NEXT: v_cmp_eq_u32_e64 s[30:31], 0, v29 +; VI-NEXT: v_cmp_eq_u32_e64 s[38:39], 0, v29 ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v6 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v14 ; VI-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v18 ; VI-NEXT: v_cmp_eq_u32_e64 s[28:29], 0, v27 -; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[30:31] +; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[38:39] ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v5 ; VI-NEXT: v_lshrrev_b32_e32 v18, 16, v13 ; VI-NEXT: v_cmp_eq_u32_e64 s[20:21], 0, v19 @@ -1957,8 +1952,6 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32> ; VI-NEXT: v_cmp_eq_u32_e64 s[16:17], 0, v28 ; VI-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[14:15] ; VI-NEXT: v_cndmask_b32_e64 v6, v14, v6, s[16:17] -; VI-NEXT: v_readlane_b32 s31, v31, 1 -; VI-NEXT: v_readlane_b32 s30, v31, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 ; VI-NEXT: v_cndmask_b32_e32 v8, v10, v9, vcc @@ -1976,10 +1969,6 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32> ; VI-NEXT: v_lshlrev_b32_e32 v8, 16, v8 ; VI-NEXT: v_or_b32_sdwa v6, v6, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; VI-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; VI-NEXT: s_mov_b64 exec, s[4:5] -; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_vselect_v16f16: diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll index 47810346c50b7..634d077e41d37 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -9,15 +9,15 @@ declare void @foo() define amdgpu_kernel void @kernel() { ; GCN-LABEL: kernel: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s38, -1 +; GCN-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s50, -1 ; GCN-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane -; GCN-NEXT: s_mov_b32 s39, 0xe00000 +; GCN-NEXT: s_mov_b32 s51, 0xe00000 ; GCN-NEXT: v_writelane_b32 v40, s4, 0 -; GCN-NEXT: s_add_u32 s36, s36, s11 +; GCN-NEXT: s_add_u32 s48, s48, s11 ; GCN-NEXT: v_writelane_b32 v40, s5, 1 -; GCN-NEXT: s_addc_u32 s37, s37, 0 +; GCN-NEXT: s_addc_u32 s49, s49, 0 ; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] ; GCN-NEXT: v_readlane_b32 s0, v40, 0 ; GCN-NEXT: s_mov_b32 s13, s9 @@ -34,9 +34,9 @@ define amdgpu_kernel void @kernel() { ; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] +; GCN-NEXT: s_mov_b64 s[0:1], s[48:49] ; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 -; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] +; GCN-NEXT: s_mov_b64 s[2:3], s[50:51] ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll index dc227f745aa9a..a93994b5d6e5d 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll @@ -15006,20 +15006,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -15033,20 +15025,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -15063,20 +15047,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -15090,20 +15066,12 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -15156,23 +15124,15 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -15183,20 +15143,12 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -15213,23 +15165,15 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -15240,20 +15184,12 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16120,20 +16056,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -16145,20 +16073,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16175,20 +16095,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -16200,20 +16112,12 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16881,20 +16785,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -16906,20 +16802,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16936,20 +16824,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -16961,20 +16841,12 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -20981,20 +20853,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -21008,20 +20872,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21038,20 +20894,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -21065,20 +20913,12 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21187,20 +21027,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -21214,20 +21046,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21244,20 +21068,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -21271,20 +21087,12 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21337,20 +21145,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -21364,20 +21164,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21394,20 +21186,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -21421,20 +21205,12 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21475,20 +21251,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -21502,20 +21270,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21532,20 +21292,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -21559,20 +21311,12 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21625,20 +21369,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -21652,20 +21388,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21682,20 +21410,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -21709,20 +21429,12 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22201,23 +21913,15 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22228,20 +21932,12 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22258,23 +21954,15 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22285,20 +21973,12 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22407,23 +22087,15 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22434,20 +22106,12 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22464,23 +22128,15 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22491,20 +22147,12 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22557,20 +22205,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -22584,20 +22224,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22614,20 +22246,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -22641,20 +22265,12 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22695,23 +22311,15 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -22722,20 +22330,12 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22752,23 +22352,15 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -22779,20 +22371,12 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22845,20 +22429,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -22872,20 +22448,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22902,20 +22470,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -22929,20 +22489,12 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -23859,20 +23411,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -23884,20 +23428,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -23914,20 +23450,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -23939,20 +23467,12 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -24164,23 +23684,15 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -24191,20 +23703,12 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -24221,23 +23725,15 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -24248,20 +23744,12 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25040,23 +24528,15 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -25067,20 +24547,12 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25097,23 +24569,15 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -25124,20 +24588,12 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25190,20 +24646,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -25217,20 +24665,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25247,20 +24687,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -25274,20 +24706,12 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25735,20 +25159,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -25760,20 +25176,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25790,20 +25198,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -25815,20 +25215,12 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -26152,23 +25544,15 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -26179,20 +25563,12 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -26209,23 +25585,15 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -26236,20 +25604,12 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27028,23 +26388,15 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -27055,20 +26407,12 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27085,23 +26429,15 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -27112,20 +26448,12 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27178,20 +26506,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -27205,20 +26525,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27235,20 +26547,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -27262,20 +26566,12 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27987,23 +27283,15 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -28014,20 +27302,12 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28044,23 +27324,15 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -28071,20 +27343,12 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28137,20 +27401,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -28164,20 +27420,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28194,20 +27442,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -28221,20 +27461,12 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29009,23 +28241,15 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -29036,20 +28260,12 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29066,23 +28282,15 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -29093,20 +28301,12 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29159,20 +28359,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND @@ -29186,20 +28378,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29216,20 +28400,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND @@ -29243,20 +28419,12 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -30055,23 +29223,15 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[4:19] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -30082,20 +29242,12 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -30112,23 +29264,15 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[4:19] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -30139,20 +29283,12 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -30205,20 +29341,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s38, 2 -; GFX900-NEXT: v_writelane_b32 v0, s39, 3 -; GFX900-NEXT: v_writelane_b32 v0, s40, 4 -; GFX900-NEXT: v_writelane_b32 v0, s41, 5 -; GFX900-NEXT: v_writelane_b32 v0, s42, 6 -; GFX900-NEXT: v_writelane_b32 v0, s43, 7 -; GFX900-NEXT: v_writelane_b32 v0, s44, 8 -; GFX900-NEXT: v_writelane_b32 v0, s45, 9 -; GFX900-NEXT: v_writelane_b32 v0, s46, 10 -; GFX900-NEXT: v_writelane_b32 v0, s47, 11 -; GFX900-NEXT: v_writelane_b32 v0, s48, 12 -; GFX900-NEXT: v_writelane_b32 v0, s49, 13 -; GFX900-NEXT: v_writelane_b32 v0, s50, 14 -; GFX900-NEXT: v_writelane_b32 v0, s51, 15 +; GFX900-NEXT: v_writelane_b32 v0, s46, 2 +; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s48, 4 +; GFX900-NEXT: v_writelane_b32 v0, s49, 5 +; GFX900-NEXT: v_writelane_b32 v0, s50, 6 +; GFX900-NEXT: v_writelane_b32 v0, s51, 7 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[36:51] ; GFX900-NEXT: ;;#ASMEND @@ -30232,20 +29360,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[8:11] ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s51, v0, 15 -; GFX900-NEXT: v_readlane_b32 s50, v0, 14 -; GFX900-NEXT: v_readlane_b32 s49, v0, 13 -; GFX900-NEXT: v_readlane_b32 s48, v0, 12 -; GFX900-NEXT: v_readlane_b32 s47, v0, 11 -; GFX900-NEXT: v_readlane_b32 s46, v0, 10 -; GFX900-NEXT: v_readlane_b32 s45, v0, 9 -; GFX900-NEXT: v_readlane_b32 s44, v0, 8 -; GFX900-NEXT: v_readlane_b32 s43, v0, 7 -; GFX900-NEXT: v_readlane_b32 s42, v0, 6 -; GFX900-NEXT: v_readlane_b32 s41, v0, 5 -; GFX900-NEXT: v_readlane_b32 s40, v0, 4 -; GFX900-NEXT: v_readlane_b32 s39, v0, 3 -; GFX900-NEXT: v_readlane_b32 s38, v0, 2 +; GFX900-NEXT: v_readlane_b32 s51, v0, 7 +; GFX900-NEXT: v_readlane_b32 s50, v0, 6 +; GFX900-NEXT: v_readlane_b32 s49, v0, 5 +; GFX900-NEXT: v_readlane_b32 s48, v0, 4 +; GFX900-NEXT: v_readlane_b32 s47, v0, 3 +; GFX900-NEXT: v_readlane_b32 s46, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -30262,20 +29382,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 -; GFX90A-NEXT: v_writelane_b32 v0, s40, 4 -; GFX90A-NEXT: v_writelane_b32 v0, s41, 5 -; GFX90A-NEXT: v_writelane_b32 v0, s42, 6 -; GFX90A-NEXT: v_writelane_b32 v0, s43, 7 -; GFX90A-NEXT: v_writelane_b32 v0, s44, 8 -; GFX90A-NEXT: v_writelane_b32 v0, s45, 9 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 10 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 11 -; GFX90A-NEXT: v_writelane_b32 v0, s48, 12 -; GFX90A-NEXT: v_writelane_b32 v0, s49, 13 -; GFX90A-NEXT: v_writelane_b32 v0, s50, 14 -; GFX90A-NEXT: v_writelane_b32 v0, s51, 15 +; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 +; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 +; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 +; GFX90A-NEXT: v_writelane_b32 v0, s51, 7 ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; def s[36:51] ; GFX90A-NEXT: ;;#ASMEND @@ -30289,20 +29401,12 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX90A-NEXT: ;;#ASMSTART ; GFX90A-NEXT: ; use s[8:11] ; GFX90A-NEXT: ;;#ASMEND -; GFX90A-NEXT: v_readlane_b32 s51, v0, 15 -; GFX90A-NEXT: v_readlane_b32 s50, v0, 14 -; GFX90A-NEXT: v_readlane_b32 s49, v0, 13 -; GFX90A-NEXT: v_readlane_b32 s48, v0, 12 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 11 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 10 -; GFX90A-NEXT: v_readlane_b32 s45, v0, 9 -; GFX90A-NEXT: v_readlane_b32 s44, v0, 8 -; GFX90A-NEXT: v_readlane_b32 s43, v0, 7 -; GFX90A-NEXT: v_readlane_b32 s42, v0, 6 -; GFX90A-NEXT: v_readlane_b32 s41, v0, 5 -; GFX90A-NEXT: v_readlane_b32 s40, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s51, v0, 7 +; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 +; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 +; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 +; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index b57adfe7d9306..3447cd161c653 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -610,42 +610,42 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: v_writelane_b32 v40, s35, 3 ; FIJI-NEXT: v_writelane_b32 v40, s36, 4 ; FIJI-NEXT: v_writelane_b32 v40, s37, 5 -; FIJI-NEXT: v_writelane_b32 v40, s38, 6 -; FIJI-NEXT: v_writelane_b32 v40, s39, 7 -; FIJI-NEXT: v_writelane_b32 v40, s40, 8 -; FIJI-NEXT: v_writelane_b32 v40, s41, 9 -; FIJI-NEXT: v_writelane_b32 v40, s42, 10 -; FIJI-NEXT: v_writelane_b32 v40, s43, 11 -; FIJI-NEXT: v_writelane_b32 v40, s44, 12 -; FIJI-NEXT: v_writelane_b32 v40, s45, 13 -; FIJI-NEXT: v_writelane_b32 v40, s46, 14 -; FIJI-NEXT: v_writelane_b32 v40, s47, 15 -; FIJI-NEXT: v_writelane_b32 v40, s48, 16 -; FIJI-NEXT: s_mov_b32 s42, s15 -; FIJI-NEXT: s_mov_b32 s43, s14 -; FIJI-NEXT: s_mov_b32 s44, s13 -; FIJI-NEXT: s_mov_b32 s45, s12 +; FIJI-NEXT: v_writelane_b32 v40, s46, 6 +; FIJI-NEXT: v_writelane_b32 v40, s47, 7 +; FIJI-NEXT: v_writelane_b32 v40, s48, 8 +; FIJI-NEXT: v_writelane_b32 v40, s49, 9 +; FIJI-NEXT: v_writelane_b32 v40, s50, 10 +; FIJI-NEXT: v_writelane_b32 v40, s51, 11 +; FIJI-NEXT: v_writelane_b32 v40, s52, 12 +; FIJI-NEXT: v_writelane_b32 v40, s53, 13 +; FIJI-NEXT: v_writelane_b32 v40, s62, 14 +; FIJI-NEXT: v_writelane_b32 v40, s63, 15 +; FIJI-NEXT: v_writelane_b32 v40, s64, 16 +; FIJI-NEXT: s_mov_b32 s50, s15 +; FIJI-NEXT: s_mov_b32 s51, s14 +; FIJI-NEXT: s_mov_b32 s52, s13 +; FIJI-NEXT: s_mov_b32 s53, s12 ; FIJI-NEXT: s_mov_b64 s[34:35], s[10:11] ; FIJI-NEXT: s_mov_b64 s[36:37], s[8:9] -; FIJI-NEXT: s_mov_b64 s[38:39], s[6:7] -; FIJI-NEXT: s_mov_b64 s[40:41], s[4:5] +; FIJI-NEXT: s_mov_b64 s[46:47], s[6:7] +; FIJI-NEXT: s_mov_b64 s[48:49], s[4:5] ; FIJI-NEXT: v_add_u32_e32 v3, vcc, v3, v4 -; FIJI-NEXT: s_mov_b64 s[46:47], exec +; FIJI-NEXT: s_mov_b64 s[62:63], exec ; FIJI-NEXT: s_addk_i32 s32, 0x400 -; FIJI-NEXT: v_writelane_b32 v40, s49, 17 +; FIJI-NEXT: v_writelane_b32 v40, s65, 17 ; FIJI-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; FIJI-NEXT: v_readfirstlane_b32 s16, v0 ; FIJI-NEXT: v_readfirstlane_b32 s17, v1 ; FIJI-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; FIJI-NEXT: s_and_saveexec_b64 s[48:49], vcc -; FIJI-NEXT: s_mov_b64 s[4:5], s[40:41] -; FIJI-NEXT: s_mov_b64 s[6:7], s[38:39] +; FIJI-NEXT: s_and_saveexec_b64 s[64:65], vcc +; FIJI-NEXT: s_mov_b64 s[4:5], s[48:49] +; FIJI-NEXT: s_mov_b64 s[6:7], s[46:47] ; FIJI-NEXT: s_mov_b64 s[8:9], s[36:37] ; FIJI-NEXT: s_mov_b64 s[10:11], s[34:35] -; FIJI-NEXT: s_mov_b32 s12, s45 -; FIJI-NEXT: s_mov_b32 s13, s44 -; FIJI-NEXT: s_mov_b32 s14, s43 -; FIJI-NEXT: s_mov_b32 s15, s42 +; FIJI-NEXT: s_mov_b32 s12, s53 +; FIJI-NEXT: s_mov_b32 s13, s52 +; FIJI-NEXT: s_mov_b32 s14, s51 +; FIJI-NEXT: s_mov_b32 s15, s50 ; FIJI-NEXT: v_mov_b32_e32 v0, v2 ; FIJI-NEXT: v_mov_b32_e32 v1, v3 ; FIJI-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -654,23 +654,23 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: ; implicit-def: $vgpr31 ; FIJI-NEXT: ; implicit-def: $vgpr2 ; FIJI-NEXT: ; implicit-def: $vgpr3 -; FIJI-NEXT: s_xor_b64 exec, exec, s[48:49] +; FIJI-NEXT: s_xor_b64 exec, exec, s[64:65] ; FIJI-NEXT: s_cbranch_execnz .LBB18_1 ; FIJI-NEXT: ; %bb.2: -; FIJI-NEXT: s_mov_b64 exec, s[46:47] +; FIJI-NEXT: s_mov_b64 exec, s[62:63] ; FIJI-NEXT: v_mov_b32_e32 v0, v4 -; FIJI-NEXT: v_readlane_b32 s49, v40, 17 -; FIJI-NEXT: v_readlane_b32 s48, v40, 16 -; FIJI-NEXT: v_readlane_b32 s47, v40, 15 -; FIJI-NEXT: v_readlane_b32 s46, v40, 14 -; FIJI-NEXT: v_readlane_b32 s45, v40, 13 -; FIJI-NEXT: v_readlane_b32 s44, v40, 12 -; FIJI-NEXT: v_readlane_b32 s43, v40, 11 -; FIJI-NEXT: v_readlane_b32 s42, v40, 10 -; FIJI-NEXT: v_readlane_b32 s41, v40, 9 -; FIJI-NEXT: v_readlane_b32 s40, v40, 8 -; FIJI-NEXT: v_readlane_b32 s39, v40, 7 -; FIJI-NEXT: v_readlane_b32 s38, v40, 6 +; FIJI-NEXT: v_readlane_b32 s65, v40, 17 +; FIJI-NEXT: v_readlane_b32 s64, v40, 16 +; FIJI-NEXT: v_readlane_b32 s63, v40, 15 +; FIJI-NEXT: v_readlane_b32 s62, v40, 14 +; FIJI-NEXT: v_readlane_b32 s53, v40, 13 +; FIJI-NEXT: v_readlane_b32 s52, v40, 12 +; FIJI-NEXT: v_readlane_b32 s51, v40, 11 +; FIJI-NEXT: v_readlane_b32 s50, v40, 10 +; FIJI-NEXT: v_readlane_b32 s49, v40, 9 +; FIJI-NEXT: v_readlane_b32 s48, v40, 8 +; FIJI-NEXT: v_readlane_b32 s47, v40, 7 +; FIJI-NEXT: v_readlane_b32 s46, v40, 6 ; FIJI-NEXT: v_readlane_b32 s37, v40, 5 ; FIJI-NEXT: v_readlane_b32 s36, v40, 4 ; FIJI-NEXT: v_readlane_b32 s35, v40, 3 @@ -701,42 +701,42 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: v_writelane_b32 v40, s35, 3 ; HAWAII-NEXT: v_writelane_b32 v40, s36, 4 ; HAWAII-NEXT: v_writelane_b32 v40, s37, 5 -; HAWAII-NEXT: v_writelane_b32 v40, s38, 6 -; HAWAII-NEXT: v_writelane_b32 v40, s39, 7 -; HAWAII-NEXT: v_writelane_b32 v40, s40, 8 -; HAWAII-NEXT: v_writelane_b32 v40, s41, 9 -; HAWAII-NEXT: v_writelane_b32 v40, s42, 10 -; HAWAII-NEXT: v_writelane_b32 v40, s43, 11 -; HAWAII-NEXT: v_writelane_b32 v40, s44, 12 -; HAWAII-NEXT: v_writelane_b32 v40, s45, 13 -; HAWAII-NEXT: v_writelane_b32 v40, s46, 14 -; HAWAII-NEXT: v_writelane_b32 v40, s47, 15 -; HAWAII-NEXT: v_writelane_b32 v40, s48, 16 -; HAWAII-NEXT: s_mov_b32 s42, s15 -; HAWAII-NEXT: s_mov_b32 s43, s14 -; HAWAII-NEXT: s_mov_b32 s44, s13 -; HAWAII-NEXT: s_mov_b32 s45, s12 +; HAWAII-NEXT: v_writelane_b32 v40, s46, 6 +; HAWAII-NEXT: v_writelane_b32 v40, s47, 7 +; HAWAII-NEXT: v_writelane_b32 v40, s48, 8 +; HAWAII-NEXT: v_writelane_b32 v40, s49, 9 +; HAWAII-NEXT: v_writelane_b32 v40, s50, 10 +; HAWAII-NEXT: v_writelane_b32 v40, s51, 11 +; HAWAII-NEXT: v_writelane_b32 v40, s52, 12 +; HAWAII-NEXT: v_writelane_b32 v40, s53, 13 +; HAWAII-NEXT: v_writelane_b32 v40, s62, 14 +; HAWAII-NEXT: v_writelane_b32 v40, s63, 15 +; HAWAII-NEXT: v_writelane_b32 v40, s64, 16 +; HAWAII-NEXT: s_mov_b32 s50, s15 +; HAWAII-NEXT: s_mov_b32 s51, s14 +; HAWAII-NEXT: s_mov_b32 s52, s13 +; HAWAII-NEXT: s_mov_b32 s53, s12 ; HAWAII-NEXT: s_mov_b64 s[34:35], s[10:11] ; HAWAII-NEXT: s_mov_b64 s[36:37], s[8:9] -; HAWAII-NEXT: s_mov_b64 s[38:39], s[6:7] -; HAWAII-NEXT: s_mov_b64 s[40:41], s[4:5] +; HAWAII-NEXT: s_mov_b64 s[46:47], s[6:7] +; HAWAII-NEXT: s_mov_b64 s[48:49], s[4:5] ; HAWAII-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; HAWAII-NEXT: s_mov_b64 s[46:47], exec +; HAWAII-NEXT: s_mov_b64 s[62:63], exec ; HAWAII-NEXT: s_addk_i32 s32, 0x400 -; HAWAII-NEXT: v_writelane_b32 v40, s49, 17 +; HAWAII-NEXT: v_writelane_b32 v40, s65, 17 ; HAWAII-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; HAWAII-NEXT: v_readfirstlane_b32 s16, v0 ; HAWAII-NEXT: v_readfirstlane_b32 s17, v1 ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; HAWAII-NEXT: s_and_saveexec_b64 s[48:49], vcc -; HAWAII-NEXT: s_mov_b64 s[4:5], s[40:41] -; HAWAII-NEXT: s_mov_b64 s[6:7], s[38:39] +; HAWAII-NEXT: s_and_saveexec_b64 s[64:65], vcc +; HAWAII-NEXT: s_mov_b64 s[4:5], s[48:49] +; HAWAII-NEXT: s_mov_b64 s[6:7], s[46:47] ; HAWAII-NEXT: s_mov_b64 s[8:9], s[36:37] ; HAWAII-NEXT: s_mov_b64 s[10:11], s[34:35] -; HAWAII-NEXT: s_mov_b32 s12, s45 -; HAWAII-NEXT: s_mov_b32 s13, s44 -; HAWAII-NEXT: s_mov_b32 s14, s43 -; HAWAII-NEXT: s_mov_b32 s15, s42 +; HAWAII-NEXT: s_mov_b32 s12, s53 +; HAWAII-NEXT: s_mov_b32 s13, s52 +; HAWAII-NEXT: s_mov_b32 s14, s51 +; HAWAII-NEXT: s_mov_b32 s15, s50 ; HAWAII-NEXT: v_mov_b32_e32 v0, v2 ; HAWAII-NEXT: v_mov_b32_e32 v1, v3 ; HAWAII-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -745,23 +745,23 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: ; implicit-def: $vgpr31 ; HAWAII-NEXT: ; implicit-def: $vgpr2 ; HAWAII-NEXT: ; implicit-def: $vgpr3 -; HAWAII-NEXT: s_xor_b64 exec, exec, s[48:49] +; HAWAII-NEXT: s_xor_b64 exec, exec, s[64:65] ; HAWAII-NEXT: s_cbranch_execnz .LBB18_1 ; HAWAII-NEXT: ; %bb.2: -; HAWAII-NEXT: s_mov_b64 exec, s[46:47] +; HAWAII-NEXT: s_mov_b64 exec, s[62:63] ; HAWAII-NEXT: v_mov_b32_e32 v0, v4 -; HAWAII-NEXT: v_readlane_b32 s49, v40, 17 -; HAWAII-NEXT: v_readlane_b32 s48, v40, 16 -; HAWAII-NEXT: v_readlane_b32 s47, v40, 15 -; HAWAII-NEXT: v_readlane_b32 s46, v40, 14 -; HAWAII-NEXT: v_readlane_b32 s45, v40, 13 -; HAWAII-NEXT: v_readlane_b32 s44, v40, 12 -; HAWAII-NEXT: v_readlane_b32 s43, v40, 11 -; HAWAII-NEXT: v_readlane_b32 s42, v40, 10 -; HAWAII-NEXT: v_readlane_b32 s41, v40, 9 -; HAWAII-NEXT: v_readlane_b32 s40, v40, 8 -; HAWAII-NEXT: v_readlane_b32 s39, v40, 7 -; HAWAII-NEXT: v_readlane_b32 s38, v40, 6 +; HAWAII-NEXT: v_readlane_b32 s65, v40, 17 +; HAWAII-NEXT: v_readlane_b32 s64, v40, 16 +; HAWAII-NEXT: v_readlane_b32 s63, v40, 15 +; HAWAII-NEXT: v_readlane_b32 s62, v40, 14 +; HAWAII-NEXT: v_readlane_b32 s53, v40, 13 +; HAWAII-NEXT: v_readlane_b32 s52, v40, 12 +; HAWAII-NEXT: v_readlane_b32 s51, v40, 11 +; HAWAII-NEXT: v_readlane_b32 s50, v40, 10 +; HAWAII-NEXT: v_readlane_b32 s49, v40, 9 +; HAWAII-NEXT: v_readlane_b32 s48, v40, 8 +; HAWAII-NEXT: v_readlane_b32 s47, v40, 7 +; HAWAII-NEXT: v_readlane_b32 s46, v40, 6 ; HAWAII-NEXT: v_readlane_b32 s37, v40, 5 ; HAWAII-NEXT: v_readlane_b32 s36, v40, 4 ; HAWAII-NEXT: v_readlane_b32 s35, v40, 3 @@ -792,42 +792,42 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: v_writelane_b32 v40, s35, 3 ; GFX9-NEXT: v_writelane_b32 v40, s36, 4 ; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s40, 8 -; GFX9-NEXT: v_writelane_b32 v40, s41, 9 -; GFX9-NEXT: v_writelane_b32 v40, s42, 10 -; GFX9-NEXT: v_writelane_b32 v40, s43, 11 -; GFX9-NEXT: v_writelane_b32 v40, s44, 12 -; GFX9-NEXT: v_writelane_b32 v40, s45, 13 -; GFX9-NEXT: v_writelane_b32 v40, s46, 14 -; GFX9-NEXT: v_writelane_b32 v40, s47, 15 -; GFX9-NEXT: v_writelane_b32 v40, s48, 16 -; GFX9-NEXT: s_mov_b32 s42, s15 -; GFX9-NEXT: s_mov_b32 s43, s14 -; GFX9-NEXT: s_mov_b32 s44, s13 -; GFX9-NEXT: s_mov_b32 s45, s12 +; GFX9-NEXT: v_writelane_b32 v40, s46, 6 +; GFX9-NEXT: v_writelane_b32 v40, s47, 7 +; GFX9-NEXT: v_writelane_b32 v40, s48, 8 +; GFX9-NEXT: v_writelane_b32 v40, s49, 9 +; GFX9-NEXT: v_writelane_b32 v40, s50, 10 +; GFX9-NEXT: v_writelane_b32 v40, s51, 11 +; GFX9-NEXT: v_writelane_b32 v40, s52, 12 +; GFX9-NEXT: v_writelane_b32 v40, s53, 13 +; GFX9-NEXT: v_writelane_b32 v40, s62, 14 +; GFX9-NEXT: v_writelane_b32 v40, s63, 15 +; GFX9-NEXT: v_writelane_b32 v40, s64, 16 +; GFX9-NEXT: s_mov_b32 s50, s15 +; GFX9-NEXT: s_mov_b32 s51, s14 +; GFX9-NEXT: s_mov_b32 s52, s13 +; GFX9-NEXT: s_mov_b32 s53, s12 ; GFX9-NEXT: s_mov_b64 s[34:35], s[10:11] ; GFX9-NEXT: s_mov_b64 s[36:37], s[8:9] -; GFX9-NEXT: s_mov_b64 s[38:39], s[6:7] -; GFX9-NEXT: s_mov_b64 s[40:41], s[4:5] +; GFX9-NEXT: s_mov_b64 s[46:47], s[6:7] +; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5] ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 -; GFX9-NEXT: s_mov_b64 s[46:47], exec +; GFX9-NEXT: s_mov_b64 s[62:63], exec ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s49, 17 +; GFX9-NEXT: v_writelane_b32 v40, s65, 17 ; GFX9-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_readfirstlane_b32 s16, v0 ; GFX9-NEXT: v_readfirstlane_b32 s17, v1 ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] -; GFX9-NEXT: s_and_saveexec_b64 s[48:49], vcc -; GFX9-NEXT: s_mov_b64 s[4:5], s[40:41] -; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] +; GFX9-NEXT: s_and_saveexec_b64 s[64:65], vcc +; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] +; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] ; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] -; GFX9-NEXT: s_mov_b32 s12, s45 -; GFX9-NEXT: s_mov_b32 s13, s44 -; GFX9-NEXT: s_mov_b32 s14, s43 -; GFX9-NEXT: s_mov_b32 s15, s42 +; GFX9-NEXT: s_mov_b32 s12, s53 +; GFX9-NEXT: s_mov_b32 s13, s52 +; GFX9-NEXT: s_mov_b32 s14, s51 +; GFX9-NEXT: s_mov_b32 s15, s50 ; GFX9-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -836,23 +836,23 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: ; implicit-def: $vgpr31 ; GFX9-NEXT: ; implicit-def: $vgpr2 ; GFX9-NEXT: ; implicit-def: $vgpr3 -; GFX9-NEXT: s_xor_b64 exec, exec, s[48:49] +; GFX9-NEXT: s_xor_b64 exec, exec, s[64:65] ; GFX9-NEXT: s_cbranch_execnz .LBB18_1 ; GFX9-NEXT: ; %bb.2: -; GFX9-NEXT: s_mov_b64 exec, s[46:47] +; GFX9-NEXT: s_mov_b64 exec, s[62:63] ; GFX9-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-NEXT: v_readlane_b32 s49, v40, 17 -; GFX9-NEXT: v_readlane_b32 s48, v40, 16 -; GFX9-NEXT: v_readlane_b32 s47, v40, 15 -; GFX9-NEXT: v_readlane_b32 s46, v40, 14 -; GFX9-NEXT: v_readlane_b32 s45, v40, 13 -; GFX9-NEXT: v_readlane_b32 s44, v40, 12 -; GFX9-NEXT: v_readlane_b32 s43, v40, 11 -; GFX9-NEXT: v_readlane_b32 s42, v40, 10 -; GFX9-NEXT: v_readlane_b32 s41, v40, 9 -; GFX9-NEXT: v_readlane_b32 s40, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 +; GFX9-NEXT: v_readlane_b32 s65, v40, 17 +; GFX9-NEXT: v_readlane_b32 s64, v40, 16 +; GFX9-NEXT: v_readlane_b32 s63, v40, 15 +; GFX9-NEXT: v_readlane_b32 s62, v40, 14 +; GFX9-NEXT: v_readlane_b32 s53, v40, 13 +; GFX9-NEXT: v_readlane_b32 s52, v40, 12 +; GFX9-NEXT: v_readlane_b32 s51, v40, 11 +; GFX9-NEXT: v_readlane_b32 s50, v40, 10 +; GFX9-NEXT: v_readlane_b32 s49, v40, 9 +; GFX9-NEXT: v_readlane_b32 s48, v40, 8 +; GFX9-NEXT: v_readlane_b32 s47, v40, 7 +; GFX9-NEXT: v_readlane_b32 s46, v40, 6 ; GFX9-NEXT: v_readlane_b32 s37, v40, 5 ; GFX9-NEXT: v_readlane_b32 s36, v40, 4 ; GFX9-NEXT: v_readlane_b32 s35, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir index 080bd052a7391..6b5c624356f47 100644 --- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir @@ -36,52 +36,73 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr41 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9 + ; CHECK-NEXT: renamable $sgpr49 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY undef $sgpr8_sgpr9 ; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4) - ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr44_sgpr45, 0, 0 :: (invariant load (s64), align 16, addrspace 4) + ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4) + ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr46_sgpr47, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr51 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY renamable $sgpr14_sgpr15 + ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr13 + ; CHECK-NEXT: renamable $vgpr23 = IMPLICIT_DEF + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr23, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr23 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, killed $vgpr23 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, killed $vgpr23 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, killed $vgpr23 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, killed $vgpr23 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, killed $vgpr23 + ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, killed $vgpr23, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr23, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr1 = COPY killed renamable $sgpr15 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $vcc = COPY renamable $sgpr40_sgpr41 + ; CHECK-NEXT: $vcc = COPY renamable $sgpr48_sgpr49 ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FC00 + ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr46_sgpr47, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr50_sgpr51, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr49 + ; CHECK-NEXT: renamable $vgpr23 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 1 + ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 2 + ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 3 + ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 4 + ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 5 + ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 6 + ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR killed $vgpr23, 7 + ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr10_sgpr11, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35 diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir index dff2bd7f7aef9..adaef348a0388 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir @@ -53,7 +53,7 @@ body: | bb.0: liveins: $sgpr30_sgpr31, $sgpr10, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-LABEL: name: sgpr_spill_lane_crossover - ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63 @@ -61,32 +61,16 @@ body: | ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr72, 8, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr73, 9, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr74, 10, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr75, 11, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr76, 12, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr77, 13, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 14, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 15, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 16, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 17, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 18, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 19, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 20, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 21, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 22, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 23, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr88, 24, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr89, 25, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr90, 26, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr91, 27, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr92, 28, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr93, 29, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 6, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 7, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 8, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 9, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 10, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 11, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 12, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 13, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 14, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 15, $vgpr63 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir index 9b0f52cb39b01..fcd835c7f09da 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir @@ -56,20 +56,15 @@ body: | bb.0: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0 ; GCN-LABEL: name: spill_exec_copy_reserved_reg - ; GCN: liveins: $vcc, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $vgpr0, $vgpr2, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr30_sgpr31, $vcc, $vgpr0 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr28_sgpr29 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr28_sgpr29 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 0, undef $vgpr2 - ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr35, 1, undef $vgpr2 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 - ; GCN-NEXT: $sgpr34_sgpr35 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $sgpr38_sgpr39 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $sgpr28_sgpr29 = IMPLICIT_DEF ; GCN-NEXT: $vgpr1 = COPY $vgpr0 ; GCN-NEXT: S_NOP 0, implicit $sgpr28_sgpr29 - ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr34_sgpr35 + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr38_sgpr39 ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0 ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1 ; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8_sgpr9_sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr15, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $vcc diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll index 0d6bccad89d82..f7ea8109beea4 100644 --- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll @@ -7,149 +7,83 @@ define void @spill_more_than_wavesize_csr_sgprs() { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: v_writelane_b32 v0, s35, 0 ; CHECK-NEXT: v_writelane_b32 v0, s36, 1 ; CHECK-NEXT: v_writelane_b32 v0, s37, 2 -; CHECK-NEXT: v_writelane_b32 v0, s38, 3 -; CHECK-NEXT: v_writelane_b32 v0, s39, 4 -; CHECK-NEXT: v_writelane_b32 v0, s40, 5 -; CHECK-NEXT: v_writelane_b32 v0, s41, 6 -; CHECK-NEXT: v_writelane_b32 v0, s42, 7 -; CHECK-NEXT: v_writelane_b32 v0, s43, 8 -; CHECK-NEXT: v_writelane_b32 v0, s44, 9 -; CHECK-NEXT: v_writelane_b32 v0, s45, 10 -; CHECK-NEXT: v_writelane_b32 v0, s46, 11 -; CHECK-NEXT: v_writelane_b32 v0, s47, 12 -; CHECK-NEXT: v_writelane_b32 v0, s48, 13 -; CHECK-NEXT: v_writelane_b32 v0, s49, 14 -; CHECK-NEXT: v_writelane_b32 v0, s50, 15 -; CHECK-NEXT: v_writelane_b32 v0, s51, 16 -; CHECK-NEXT: v_writelane_b32 v0, s52, 17 -; CHECK-NEXT: v_writelane_b32 v0, s53, 18 -; CHECK-NEXT: v_writelane_b32 v0, s54, 19 -; CHECK-NEXT: v_writelane_b32 v0, s55, 20 -; CHECK-NEXT: v_writelane_b32 v0, s56, 21 -; CHECK-NEXT: v_writelane_b32 v0, s57, 22 -; CHECK-NEXT: v_writelane_b32 v0, s58, 23 -; CHECK-NEXT: v_writelane_b32 v0, s59, 24 -; CHECK-NEXT: v_writelane_b32 v0, s60, 25 -; CHECK-NEXT: v_writelane_b32 v0, s61, 26 -; CHECK-NEXT: v_writelane_b32 v0, s62, 27 -; CHECK-NEXT: v_writelane_b32 v0, s63, 28 -; CHECK-NEXT: v_writelane_b32 v0, s64, 29 -; CHECK-NEXT: v_writelane_b32 v0, s65, 30 -; CHECK-NEXT: v_writelane_b32 v0, s66, 31 -; CHECK-NEXT: v_writelane_b32 v0, s67, 32 -; CHECK-NEXT: v_writelane_b32 v0, s68, 33 -; CHECK-NEXT: v_writelane_b32 v0, s69, 34 -; CHECK-NEXT: v_writelane_b32 v0, s70, 35 -; CHECK-NEXT: v_writelane_b32 v0, s71, 36 -; CHECK-NEXT: v_writelane_b32 v0, s72, 37 -; CHECK-NEXT: v_writelane_b32 v0, s73, 38 -; CHECK-NEXT: v_writelane_b32 v0, s74, 39 -; CHECK-NEXT: v_writelane_b32 v0, s75, 40 -; CHECK-NEXT: v_writelane_b32 v0, s76, 41 -; CHECK-NEXT: v_writelane_b32 v0, s77, 42 -; CHECK-NEXT: v_writelane_b32 v0, s78, 43 -; CHECK-NEXT: v_writelane_b32 v0, s79, 44 -; CHECK-NEXT: v_writelane_b32 v0, s80, 45 -; CHECK-NEXT: v_writelane_b32 v0, s81, 46 -; CHECK-NEXT: v_writelane_b32 v0, s82, 47 -; CHECK-NEXT: v_writelane_b32 v0, s83, 48 -; CHECK-NEXT: v_writelane_b32 v0, s84, 49 -; CHECK-NEXT: v_writelane_b32 v0, s85, 50 -; CHECK-NEXT: v_writelane_b32 v0, s86, 51 -; CHECK-NEXT: v_writelane_b32 v0, s87, 52 -; CHECK-NEXT: v_writelane_b32 v0, s88, 53 -; CHECK-NEXT: v_writelane_b32 v0, s89, 54 -; CHECK-NEXT: v_writelane_b32 v0, s90, 55 -; CHECK-NEXT: v_writelane_b32 v0, s91, 56 -; CHECK-NEXT: v_writelane_b32 v0, s92, 57 -; CHECK-NEXT: v_writelane_b32 v0, s93, 58 -; CHECK-NEXT: v_writelane_b32 v0, s94, 59 -; CHECK-NEXT: v_writelane_b32 v0, s95, 60 -; CHECK-NEXT: v_writelane_b32 v1, s99, 0 -; CHECK-NEXT: v_writelane_b32 v0, s96, 61 -; CHECK-NEXT: v_writelane_b32 v1, s100, 1 -; CHECK-NEXT: v_writelane_b32 v0, s97, 62 -; CHECK-NEXT: v_writelane_b32 v1, s101, 2 -; CHECK-NEXT: v_writelane_b32 v0, s98, 63 -; CHECK-NEXT: v_writelane_b32 v1, s102, 3 +; CHECK-NEXT: v_writelane_b32 v0, s46, 3 +; CHECK-NEXT: v_writelane_b32 v0, s47, 4 +; CHECK-NEXT: v_writelane_b32 v0, s48, 5 +; CHECK-NEXT: v_writelane_b32 v0, s49, 6 +; CHECK-NEXT: v_writelane_b32 v0, s50, 7 +; CHECK-NEXT: v_writelane_b32 v0, s51, 8 +; CHECK-NEXT: v_writelane_b32 v0, s52, 9 +; CHECK-NEXT: v_writelane_b32 v0, s53, 10 +; CHECK-NEXT: v_writelane_b32 v0, s62, 11 +; CHECK-NEXT: v_writelane_b32 v0, s63, 12 +; CHECK-NEXT: v_writelane_b32 v0, s64, 13 +; CHECK-NEXT: v_writelane_b32 v0, s65, 14 +; CHECK-NEXT: v_writelane_b32 v0, s66, 15 +; CHECK-NEXT: v_writelane_b32 v0, s67, 16 +; CHECK-NEXT: v_writelane_b32 v0, s68, 17 +; CHECK-NEXT: v_writelane_b32 v0, s69, 18 +; CHECK-NEXT: v_writelane_b32 v0, s78, 19 +; CHECK-NEXT: v_writelane_b32 v0, s79, 20 +; CHECK-NEXT: v_writelane_b32 v0, s80, 21 +; CHECK-NEXT: v_writelane_b32 v0, s81, 22 +; CHECK-NEXT: v_writelane_b32 v0, s82, 23 +; CHECK-NEXT: v_writelane_b32 v0, s83, 24 +; CHECK-NEXT: v_writelane_b32 v0, s84, 25 +; CHECK-NEXT: v_writelane_b32 v0, s85, 26 +; CHECK-NEXT: v_writelane_b32 v0, s94, 27 +; CHECK-NEXT: v_writelane_b32 v0, s95, 28 +; CHECK-NEXT: v_writelane_b32 v0, s96, 29 +; CHECK-NEXT: v_writelane_b32 v0, s97, 30 +; CHECK-NEXT: v_writelane_b32 v0, s98, 31 +; CHECK-NEXT: v_writelane_b32 v0, s99, 32 +; CHECK-NEXT: v_writelane_b32 v0, s100, 33 +; CHECK-NEXT: v_writelane_b32 v0, s101, 34 +; CHECK-NEXT: v_writelane_b32 v0, s102, 35 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s102, v1, 3 -; CHECK-NEXT: v_readlane_b32 s101, v1, 2 -; CHECK-NEXT: v_readlane_b32 s100, v1, 1 -; CHECK-NEXT: v_readlane_b32 s99, v1, 0 -; CHECK-NEXT: v_readlane_b32 s98, v0, 63 -; CHECK-NEXT: v_readlane_b32 s97, v0, 62 -; CHECK-NEXT: v_readlane_b32 s96, v0, 61 -; CHECK-NEXT: v_readlane_b32 s95, v0, 60 -; CHECK-NEXT: v_readlane_b32 s94, v0, 59 -; CHECK-NEXT: v_readlane_b32 s93, v0, 58 -; CHECK-NEXT: v_readlane_b32 s92, v0, 57 -; CHECK-NEXT: v_readlane_b32 s91, v0, 56 -; CHECK-NEXT: v_readlane_b32 s90, v0, 55 -; CHECK-NEXT: v_readlane_b32 s89, v0, 54 -; CHECK-NEXT: v_readlane_b32 s88, v0, 53 -; CHECK-NEXT: v_readlane_b32 s87, v0, 52 -; CHECK-NEXT: v_readlane_b32 s86, v0, 51 -; CHECK-NEXT: v_readlane_b32 s85, v0, 50 -; CHECK-NEXT: v_readlane_b32 s84, v0, 49 -; CHECK-NEXT: v_readlane_b32 s83, v0, 48 -; CHECK-NEXT: v_readlane_b32 s82, v0, 47 -; CHECK-NEXT: v_readlane_b32 s81, v0, 46 -; CHECK-NEXT: v_readlane_b32 s80, v0, 45 -; CHECK-NEXT: v_readlane_b32 s79, v0, 44 -; CHECK-NEXT: v_readlane_b32 s78, v0, 43 -; CHECK-NEXT: v_readlane_b32 s77, v0, 42 -; CHECK-NEXT: v_readlane_b32 s76, v0, 41 -; CHECK-NEXT: v_readlane_b32 s75, v0, 40 -; CHECK-NEXT: v_readlane_b32 s74, v0, 39 -; CHECK-NEXT: v_readlane_b32 s73, v0, 38 -; CHECK-NEXT: v_readlane_b32 s72, v0, 37 -; CHECK-NEXT: v_readlane_b32 s71, v0, 36 -; CHECK-NEXT: v_readlane_b32 s70, v0, 35 -; CHECK-NEXT: v_readlane_b32 s69, v0, 34 -; CHECK-NEXT: v_readlane_b32 s68, v0, 33 -; CHECK-NEXT: v_readlane_b32 s67, v0, 32 -; CHECK-NEXT: v_readlane_b32 s66, v0, 31 -; CHECK-NEXT: v_readlane_b32 s65, v0, 30 -; CHECK-NEXT: v_readlane_b32 s64, v0, 29 -; CHECK-NEXT: v_readlane_b32 s63, v0, 28 -; CHECK-NEXT: v_readlane_b32 s62, v0, 27 -; CHECK-NEXT: v_readlane_b32 s61, v0, 26 -; CHECK-NEXT: v_readlane_b32 s60, v0, 25 -; CHECK-NEXT: v_readlane_b32 s59, v0, 24 -; CHECK-NEXT: v_readlane_b32 s58, v0, 23 -; CHECK-NEXT: v_readlane_b32 s57, v0, 22 -; CHECK-NEXT: v_readlane_b32 s56, v0, 21 -; CHECK-NEXT: v_readlane_b32 s55, v0, 20 -; CHECK-NEXT: v_readlane_b32 s54, v0, 19 -; CHECK-NEXT: v_readlane_b32 s53, v0, 18 -; CHECK-NEXT: v_readlane_b32 s52, v0, 17 -; CHECK-NEXT: v_readlane_b32 s51, v0, 16 -; CHECK-NEXT: v_readlane_b32 s50, v0, 15 -; CHECK-NEXT: v_readlane_b32 s49, v0, 14 -; CHECK-NEXT: v_readlane_b32 s48, v0, 13 -; CHECK-NEXT: v_readlane_b32 s47, v0, 12 -; CHECK-NEXT: v_readlane_b32 s46, v0, 11 -; CHECK-NEXT: v_readlane_b32 s45, v0, 10 -; CHECK-NEXT: v_readlane_b32 s44, v0, 9 -; CHECK-NEXT: v_readlane_b32 s43, v0, 8 -; CHECK-NEXT: v_readlane_b32 s42, v0, 7 -; CHECK-NEXT: v_readlane_b32 s41, v0, 6 -; CHECK-NEXT: v_readlane_b32 s40, v0, 5 -; CHECK-NEXT: v_readlane_b32 s39, v0, 4 -; CHECK-NEXT: v_readlane_b32 s38, v0, 3 +; CHECK-NEXT: v_readlane_b32 s102, v0, 35 +; CHECK-NEXT: v_readlane_b32 s101, v0, 34 +; CHECK-NEXT: v_readlane_b32 s100, v0, 33 +; CHECK-NEXT: v_readlane_b32 s99, v0, 32 +; CHECK-NEXT: v_readlane_b32 s98, v0, 31 +; CHECK-NEXT: v_readlane_b32 s97, v0, 30 +; CHECK-NEXT: v_readlane_b32 s96, v0, 29 +; CHECK-NEXT: v_readlane_b32 s95, v0, 28 +; CHECK-NEXT: v_readlane_b32 s94, v0, 27 +; CHECK-NEXT: v_readlane_b32 s85, v0, 26 +; CHECK-NEXT: v_readlane_b32 s84, v0, 25 +; CHECK-NEXT: v_readlane_b32 s83, v0, 24 +; CHECK-NEXT: v_readlane_b32 s82, v0, 23 +; CHECK-NEXT: v_readlane_b32 s81, v0, 22 +; CHECK-NEXT: v_readlane_b32 s80, v0, 21 +; CHECK-NEXT: v_readlane_b32 s79, v0, 20 +; CHECK-NEXT: v_readlane_b32 s78, v0, 19 +; CHECK-NEXT: v_readlane_b32 s69, v0, 18 +; CHECK-NEXT: v_readlane_b32 s68, v0, 17 +; CHECK-NEXT: v_readlane_b32 s67, v0, 16 +; CHECK-NEXT: v_readlane_b32 s66, v0, 15 +; CHECK-NEXT: v_readlane_b32 s65, v0, 14 +; CHECK-NEXT: v_readlane_b32 s64, v0, 13 +; CHECK-NEXT: v_readlane_b32 s63, v0, 12 +; CHECK-NEXT: v_readlane_b32 s62, v0, 11 +; CHECK-NEXT: v_readlane_b32 s53, v0, 10 +; CHECK-NEXT: v_readlane_b32 s52, v0, 9 +; CHECK-NEXT: v_readlane_b32 s51, v0, 8 +; CHECK-NEXT: v_readlane_b32 s50, v0, 7 +; CHECK-NEXT: v_readlane_b32 s49, v0, 6 +; CHECK-NEXT: v_readlane_b32 s48, v0, 5 +; CHECK-NEXT: v_readlane_b32 s47, v0, 4 +; CHECK-NEXT: v_readlane_b32 s46, v0, 3 ; CHECK-NEXT: v_readlane_b32 s37, v0, 2 ; CHECK-NEXT: v_readlane_b32 s36, v0, 1 ; CHECK-NEXT: v_readlane_b32 s35, v0, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -172,152 +106,86 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: v_writelane_b32 v1, s35, 0 ; CHECK-NEXT: v_writelane_b32 v1, s36, 1 ; CHECK-NEXT: v_writelane_b32 v1, s37, 2 -; CHECK-NEXT: v_writelane_b32 v1, s38, 3 -; CHECK-NEXT: v_writelane_b32 v1, s39, 4 -; CHECK-NEXT: v_writelane_b32 v1, s40, 5 -; CHECK-NEXT: v_writelane_b32 v1, s41, 6 -; CHECK-NEXT: v_writelane_b32 v1, s42, 7 -; CHECK-NEXT: v_writelane_b32 v1, s43, 8 -; CHECK-NEXT: v_writelane_b32 v1, s44, 9 -; CHECK-NEXT: v_writelane_b32 v1, s45, 10 -; CHECK-NEXT: v_writelane_b32 v1, s46, 11 -; CHECK-NEXT: v_writelane_b32 v1, s47, 12 -; CHECK-NEXT: v_writelane_b32 v1, s48, 13 -; CHECK-NEXT: v_writelane_b32 v1, s49, 14 -; CHECK-NEXT: v_writelane_b32 v1, s50, 15 -; CHECK-NEXT: v_writelane_b32 v1, s51, 16 -; CHECK-NEXT: v_writelane_b32 v1, s52, 17 -; CHECK-NEXT: v_writelane_b32 v1, s53, 18 -; CHECK-NEXT: v_writelane_b32 v1, s54, 19 -; CHECK-NEXT: v_writelane_b32 v1, s55, 20 -; CHECK-NEXT: v_writelane_b32 v1, s56, 21 -; CHECK-NEXT: v_writelane_b32 v1, s57, 22 -; CHECK-NEXT: v_writelane_b32 v1, s58, 23 -; CHECK-NEXT: v_writelane_b32 v1, s59, 24 -; CHECK-NEXT: v_writelane_b32 v1, s60, 25 -; CHECK-NEXT: v_writelane_b32 v1, s61, 26 -; CHECK-NEXT: v_writelane_b32 v1, s62, 27 -; CHECK-NEXT: v_writelane_b32 v1, s63, 28 -; CHECK-NEXT: v_writelane_b32 v1, s64, 29 -; CHECK-NEXT: v_writelane_b32 v1, s65, 30 -; CHECK-NEXT: v_writelane_b32 v1, s66, 31 -; CHECK-NEXT: v_writelane_b32 v1, s67, 32 -; CHECK-NEXT: v_writelane_b32 v1, s68, 33 -; CHECK-NEXT: v_writelane_b32 v1, s69, 34 -; CHECK-NEXT: v_writelane_b32 v1, s70, 35 -; CHECK-NEXT: v_writelane_b32 v1, s71, 36 -; CHECK-NEXT: v_writelane_b32 v1, s72, 37 -; CHECK-NEXT: v_writelane_b32 v1, s73, 38 -; CHECK-NEXT: v_writelane_b32 v1, s74, 39 -; CHECK-NEXT: v_writelane_b32 v1, s75, 40 -; CHECK-NEXT: v_writelane_b32 v1, s76, 41 -; CHECK-NEXT: v_writelane_b32 v1, s77, 42 -; CHECK-NEXT: v_writelane_b32 v1, s78, 43 -; CHECK-NEXT: v_writelane_b32 v1, s79, 44 -; CHECK-NEXT: v_writelane_b32 v1, s80, 45 -; CHECK-NEXT: v_writelane_b32 v1, s81, 46 -; CHECK-NEXT: v_writelane_b32 v1, s82, 47 -; CHECK-NEXT: v_writelane_b32 v1, s83, 48 -; CHECK-NEXT: v_writelane_b32 v1, s84, 49 -; CHECK-NEXT: v_writelane_b32 v1, s85, 50 -; CHECK-NEXT: v_writelane_b32 v1, s86, 51 -; CHECK-NEXT: v_writelane_b32 v1, s87, 52 -; CHECK-NEXT: v_writelane_b32 v1, s88, 53 -; CHECK-NEXT: v_writelane_b32 v1, s89, 54 -; CHECK-NEXT: v_writelane_b32 v1, s90, 55 -; CHECK-NEXT: v_writelane_b32 v1, s91, 56 -; CHECK-NEXT: v_writelane_b32 v1, s92, 57 -; CHECK-NEXT: v_writelane_b32 v1, s93, 58 -; CHECK-NEXT: v_writelane_b32 v1, s94, 59 -; CHECK-NEXT: v_writelane_b32 v1, s95, 60 -; CHECK-NEXT: v_writelane_b32 v2, s99, 0 -; CHECK-NEXT: v_writelane_b32 v1, s96, 61 -; CHECK-NEXT: v_writelane_b32 v2, s100, 1 -; CHECK-NEXT: v_writelane_b32 v1, s97, 62 -; CHECK-NEXT: v_writelane_b32 v2, s101, 2 +; CHECK-NEXT: v_writelane_b32 v1, s46, 3 +; CHECK-NEXT: v_writelane_b32 v1, s47, 4 +; CHECK-NEXT: v_writelane_b32 v1, s48, 5 +; CHECK-NEXT: v_writelane_b32 v1, s49, 6 +; CHECK-NEXT: v_writelane_b32 v1, s50, 7 +; CHECK-NEXT: v_writelane_b32 v1, s51, 8 +; CHECK-NEXT: v_writelane_b32 v1, s52, 9 +; CHECK-NEXT: v_writelane_b32 v1, s53, 10 +; CHECK-NEXT: v_writelane_b32 v1, s62, 11 +; CHECK-NEXT: v_writelane_b32 v1, s63, 12 +; CHECK-NEXT: v_writelane_b32 v1, s64, 13 +; CHECK-NEXT: v_writelane_b32 v1, s65, 14 +; CHECK-NEXT: v_writelane_b32 v1, s66, 15 +; CHECK-NEXT: v_writelane_b32 v1, s67, 16 +; CHECK-NEXT: v_writelane_b32 v1, s68, 17 +; CHECK-NEXT: v_writelane_b32 v1, s69, 18 +; CHECK-NEXT: v_writelane_b32 v1, s78, 19 +; CHECK-NEXT: v_writelane_b32 v1, s79, 20 +; CHECK-NEXT: v_writelane_b32 v1, s80, 21 +; CHECK-NEXT: v_writelane_b32 v1, s81, 22 +; CHECK-NEXT: v_writelane_b32 v1, s82, 23 +; CHECK-NEXT: v_writelane_b32 v1, s83, 24 +; CHECK-NEXT: v_writelane_b32 v1, s84, 25 +; CHECK-NEXT: v_writelane_b32 v1, s85, 26 +; CHECK-NEXT: v_writelane_b32 v1, s94, 27 +; CHECK-NEXT: v_writelane_b32 v1, s95, 28 +; CHECK-NEXT: v_writelane_b32 v1, s96, 29 +; CHECK-NEXT: v_writelane_b32 v1, s97, 30 +; CHECK-NEXT: v_writelane_b32 v1, s98, 31 +; CHECK-NEXT: v_writelane_b32 v1, s99, 32 +; CHECK-NEXT: v_writelane_b32 v1, s100, 33 +; CHECK-NEXT: v_writelane_b32 v1, s101, 34 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: v_writelane_b32 v1, s98, 63 -; CHECK-NEXT: v_writelane_b32 v2, s102, 3 +; CHECK-NEXT: v_writelane_b32 v1, s102, 35 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s102, v2, 3 -; CHECK-NEXT: v_readlane_b32 s101, v2, 2 -; CHECK-NEXT: v_readlane_b32 s100, v2, 1 -; CHECK-NEXT: v_readlane_b32 s99, v2, 0 -; CHECK-NEXT: v_readlane_b32 s98, v1, 63 -; CHECK-NEXT: v_readlane_b32 s97, v1, 62 -; CHECK-NEXT: v_readlane_b32 s96, v1, 61 -; CHECK-NEXT: v_readlane_b32 s95, v1, 60 -; CHECK-NEXT: v_readlane_b32 s94, v1, 59 -; CHECK-NEXT: v_readlane_b32 s93, v1, 58 -; CHECK-NEXT: v_readlane_b32 s92, v1, 57 -; CHECK-NEXT: v_readlane_b32 s91, v1, 56 -; CHECK-NEXT: v_readlane_b32 s90, v1, 55 -; CHECK-NEXT: v_readlane_b32 s89, v1, 54 -; CHECK-NEXT: v_readlane_b32 s88, v1, 53 -; CHECK-NEXT: v_readlane_b32 s87, v1, 52 -; CHECK-NEXT: v_readlane_b32 s86, v1, 51 -; CHECK-NEXT: v_readlane_b32 s85, v1, 50 -; CHECK-NEXT: v_readlane_b32 s84, v1, 49 -; CHECK-NEXT: v_readlane_b32 s83, v1, 48 -; CHECK-NEXT: v_readlane_b32 s82, v1, 47 -; CHECK-NEXT: v_readlane_b32 s81, v1, 46 -; CHECK-NEXT: v_readlane_b32 s80, v1, 45 -; CHECK-NEXT: v_readlane_b32 s79, v1, 44 -; CHECK-NEXT: v_readlane_b32 s78, v1, 43 -; CHECK-NEXT: v_readlane_b32 s77, v1, 42 -; CHECK-NEXT: v_readlane_b32 s76, v1, 41 -; CHECK-NEXT: v_readlane_b32 s75, v1, 40 -; CHECK-NEXT: v_readlane_b32 s74, v1, 39 -; CHECK-NEXT: v_readlane_b32 s73, v1, 38 -; CHECK-NEXT: v_readlane_b32 s72, v1, 37 -; CHECK-NEXT: v_readlane_b32 s71, v1, 36 -; CHECK-NEXT: v_readlane_b32 s70, v1, 35 -; CHECK-NEXT: v_readlane_b32 s69, v1, 34 -; CHECK-NEXT: v_readlane_b32 s68, v1, 33 -; CHECK-NEXT: v_readlane_b32 s67, v1, 32 -; CHECK-NEXT: v_readlane_b32 s66, v1, 31 -; CHECK-NEXT: v_readlane_b32 s65, v1, 30 -; CHECK-NEXT: v_readlane_b32 s64, v1, 29 -; CHECK-NEXT: v_readlane_b32 s63, v1, 28 -; CHECK-NEXT: v_readlane_b32 s62, v1, 27 -; CHECK-NEXT: v_readlane_b32 s61, v1, 26 -; CHECK-NEXT: v_readlane_b32 s60, v1, 25 -; CHECK-NEXT: v_readlane_b32 s59, v1, 24 -; CHECK-NEXT: v_readlane_b32 s58, v1, 23 -; CHECK-NEXT: v_readlane_b32 s57, v1, 22 -; CHECK-NEXT: v_readlane_b32 s56, v1, 21 -; CHECK-NEXT: v_readlane_b32 s55, v1, 20 -; CHECK-NEXT: v_readlane_b32 s54, v1, 19 -; CHECK-NEXT: v_readlane_b32 s53, v1, 18 -; CHECK-NEXT: v_readlane_b32 s52, v1, 17 -; CHECK-NEXT: v_readlane_b32 s51, v1, 16 -; CHECK-NEXT: v_readlane_b32 s50, v1, 15 -; CHECK-NEXT: v_readlane_b32 s49, v1, 14 -; CHECK-NEXT: v_readlane_b32 s48, v1, 13 -; CHECK-NEXT: v_readlane_b32 s47, v1, 12 -; CHECK-NEXT: v_readlane_b32 s46, v1, 11 -; CHECK-NEXT: v_readlane_b32 s45, v1, 10 -; CHECK-NEXT: v_readlane_b32 s44, v1, 9 -; CHECK-NEXT: v_readlane_b32 s43, v1, 8 -; CHECK-NEXT: v_readlane_b32 s42, v1, 7 -; CHECK-NEXT: v_readlane_b32 s41, v1, 6 -; CHECK-NEXT: v_readlane_b32 s40, v1, 5 -; CHECK-NEXT: v_readlane_b32 s39, v1, 4 -; CHECK-NEXT: v_readlane_b32 s38, v1, 3 +; CHECK-NEXT: v_readlane_b32 s102, v1, 35 +; CHECK-NEXT: v_readlane_b32 s101, v1, 34 +; CHECK-NEXT: v_readlane_b32 s100, v1, 33 +; CHECK-NEXT: v_readlane_b32 s99, v1, 32 +; CHECK-NEXT: v_readlane_b32 s98, v1, 31 +; CHECK-NEXT: v_readlane_b32 s97, v1, 30 +; CHECK-NEXT: v_readlane_b32 s96, v1, 29 +; CHECK-NEXT: v_readlane_b32 s95, v1, 28 +; CHECK-NEXT: v_readlane_b32 s94, v1, 27 +; CHECK-NEXT: v_readlane_b32 s85, v1, 26 +; CHECK-NEXT: v_readlane_b32 s84, v1, 25 +; CHECK-NEXT: v_readlane_b32 s83, v1, 24 +; CHECK-NEXT: v_readlane_b32 s82, v1, 23 +; CHECK-NEXT: v_readlane_b32 s81, v1, 22 +; CHECK-NEXT: v_readlane_b32 s80, v1, 21 +; CHECK-NEXT: v_readlane_b32 s79, v1, 20 +; CHECK-NEXT: v_readlane_b32 s78, v1, 19 +; CHECK-NEXT: v_readlane_b32 s69, v1, 18 +; CHECK-NEXT: v_readlane_b32 s68, v1, 17 +; CHECK-NEXT: v_readlane_b32 s67, v1, 16 +; CHECK-NEXT: v_readlane_b32 s66, v1, 15 +; CHECK-NEXT: v_readlane_b32 s65, v1, 14 +; CHECK-NEXT: v_readlane_b32 s64, v1, 13 +; CHECK-NEXT: v_readlane_b32 s63, v1, 12 +; CHECK-NEXT: v_readlane_b32 s62, v1, 11 +; CHECK-NEXT: v_readlane_b32 s53, v1, 10 +; CHECK-NEXT: v_readlane_b32 s52, v1, 9 +; CHECK-NEXT: v_readlane_b32 s51, v1, 8 +; CHECK-NEXT: v_readlane_b32 s50, v1, 7 +; CHECK-NEXT: v_readlane_b32 s49, v1, 6 +; CHECK-NEXT: v_readlane_b32 s48, v1, 5 +; CHECK-NEXT: v_readlane_b32 s47, v1, 4 +; CHECK-NEXT: v_readlane_b32 s46, v1, 3 ; CHECK-NEXT: v_readlane_b32 s37, v1, 2 ; CHECK-NEXT: v_readlane_b32 s36, v1, 1 ; CHECK-NEXT: v_readlane_b32 s35, v1, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index 8f53ec2f992da..359152e9d2b45 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -17,70 +17,78 @@ body: | ; RA-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; RA-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_1024 = S_MOV_B32 -1 ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_1024 = S_MOV_B32 -1 + ; RA-NEXT: SI_SPILL_S1024_SAVE [[S_MOV_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) ; RA-NEXT: undef [[S_MOV_B32_1:%[0-9]+]].sub0:sgpr_1024 = S_MOV_B32 0 + ; RA-NEXT: SI_SPILL_S1024_SAVE [[S_MOV_B32_1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) ; RA-NEXT: {{ $}} ; RA-NEXT: bb.1: ; RA-NEXT: successors: %bb.2(0x80000000) ; RA-NEXT: {{ $}} - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub4:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub5:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub6:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub7:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub8:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub9:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub10:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub11:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub12:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub13:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub14:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub15:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub16:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub17:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub18:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub19:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub20:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub21:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub22:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub23:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub24:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub25:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub26:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub27:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub28:sgpr_1024 = COPY [[S_MOV_B32_]].sub0 - ; RA-NEXT: [[S_MOV_B32_:%[0-9]+]].sub29:sgpr_1024 = COPY [[S_MOV_B32_]].sub1 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub1:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub2:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub3:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub4:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub5:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub6:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub7:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub8:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub9:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub10:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub11:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub12:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub13:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub14:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub15:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub16:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub17:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub18:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub19:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub20:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub21:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub22:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub23:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub24:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub25:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub26:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub27:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub28:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub29:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub30:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 - ; RA-NEXT: [[S_MOV_B32_1:%[0-9]+]].sub31:sgpr_1024 = COPY [[S_MOV_B32_1]].sub0 + ; RA-NEXT: [[SI_SPILL_S1024_RESTORE:%[0-9]+]]:sgpr_1024 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; RA-NEXT: undef [[COPY:%[0-9]+]].sub0_sub1:sgpr_1024 = COPY [[SI_SPILL_S1024_RESTORE]].sub0_sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub2:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub3:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub4:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub5:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub6:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub7:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub8:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub9:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub10:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub11:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub12:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub13:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub14:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub15:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub16:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub17:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub18:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub19:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub20:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub21:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub22:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub23:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub24:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub25:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub26:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub27:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: [[COPY:%[0-9]+]].sub28:sgpr_1024 = COPY [[COPY]].sub0 + ; RA-NEXT: [[COPY:%[0-9]+]].sub29:sgpr_1024 = COPY [[COPY]].sub1 + ; RA-NEXT: SI_SPILL_S1024_SAVE [[COPY]], %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; RA-NEXT: [[SI_SPILL_S1024_RESTORE1:%[0-9]+]]:sgpr_1024 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; RA-NEXT: undef [[COPY1:%[0-9]+]].sub0:sgpr_1024 = COPY [[SI_SPILL_S1024_RESTORE1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub1:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub2:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub3:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub4:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub5:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub6:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub7:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub8:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub9:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub10:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub11:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub12:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub13:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub14:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub15:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub16:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub17:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub18:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub19:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub20:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub21:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub22:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub23:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub24:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub25:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub26:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub27:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub28:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub29:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub30:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: [[COPY1:%[0-9]+]].sub31:sgpr_1024 = COPY [[COPY1]].sub0 + ; RA-NEXT: SI_SPILL_S1024_SAVE [[COPY1]], %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) ; RA-NEXT: {{ $}} ; RA-NEXT: bb.2: ; RA-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -95,14 +103,17 @@ body: | ; VR-NEXT: {{ $}} ; VR-NEXT: renamable $sgpr37 = S_MOV_B32 -1 ; VR-NEXT: renamable $sgpr36 = S_MOV_B32 -1 - ; VR-NEXT: renamable $sgpr68 = S_MOV_B32 0 + ; VR-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; VR-NEXT: renamable $sgpr36 = S_MOV_B32 0 ; VR-NEXT: renamable $sgpr30_sgpr31 = IMPLICIT_DEF ; VR-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF + ; VR-NEXT: SI_SPILL_S1024_SAVE killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) ; VR-NEXT: {{ $}} ; VR-NEXT: bb.1: ; VR-NEXT: successors: %bb.2(0x80000000) - ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003 + ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35 ; VR-NEXT: {{ $}} + ; VR-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) ; VR-NEXT: renamable $sgpr38 = COPY renamable $sgpr36 ; VR-NEXT: renamable $sgpr39 = COPY renamable $sgpr37 ; VR-NEXT: renamable $sgpr40 = COPY renamable $sgpr36 @@ -131,41 +142,44 @@ body: | ; VR-NEXT: renamable $sgpr63 = COPY renamable $sgpr37 ; VR-NEXT: renamable $sgpr64 = COPY renamable $sgpr36 ; VR-NEXT: renamable $sgpr65 = COPY renamable $sgpr37 - ; VR-NEXT: renamable $sgpr69 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr70 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr71 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr72 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr73 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr74 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr75 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr76 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr77 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr78 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr79 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr80 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr81 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr82 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr83 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr84 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr85 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr86 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr87 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr88 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr89 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr90 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr91 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr92 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr93 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr94 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr95 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr96 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr97 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr98 = COPY renamable $sgpr68 - ; VR-NEXT: renamable $sgpr99 = COPY renamable $sgpr68 + ; VR-NEXT: SI_SPILL_S1024_SAVE killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; VR-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; VR-NEXT: renamable $sgpr37 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr38 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr39 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr40 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr41 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr42 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr43 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr44 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr45 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr46 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr47 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr48 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr49 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr50 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr51 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr52 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr53 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr54 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr55 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr56 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr57 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr58 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr59 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr60 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr61 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr62 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr63 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr64 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr65 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr66 = COPY renamable $sgpr36 + ; VR-NEXT: renamable $sgpr67 = COPY renamable $sgpr36 + ; VR-NEXT: SI_SPILL_S1024_SAVE killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) ; VR-NEXT: {{ $}} ; VR-NEXT: bb.2: ; VR-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003 + ; VR-NEXT: liveins: $sgpr30_sgpr31, $sgpr34_sgpr35 ; VR-NEXT: {{ $}} ; VR-NEXT: S_NOP 0, csr_amdgpu, implicit renamable $sgpr30_sgpr31, implicit renamable $sgpr34_sgpr35 ; VR-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index d4d3b37a0ed1e..89bb346ee98df 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -11,12 +11,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-LABEL: kernel_background_evaluate: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_load_dword s0, s[4:5], 0x24 -; MUBUF-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 -; MUBUF-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 -; MUBUF-NEXT: s_mov_b32 s38, -1 -; MUBUF-NEXT: s_mov_b32 s39, 0x31c16000 -; MUBUF-NEXT: s_add_u32 s36, s36, s11 -; MUBUF-NEXT: s_addc_u32 s37, s37, 0 +; MUBUF-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; MUBUF-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; MUBUF-NEXT: s_mov_b32 s50, -1 +; MUBUF-NEXT: s_mov_b32 s51, 0x31c16000 +; MUBUF-NEXT: s_add_u32 s48, s48, s11 +; MUBUF-NEXT: s_addc_u32 s49, s49, 0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x2000 ; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000 ; MUBUF-NEXT: v_mov_b32_e32 v3, 0 @@ -27,8 +27,8 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-NEXT: s_mov_b32 s32, 0xc0000 ; MUBUF-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF-NEXT: v_mov_b32_e32 v0, s0 -; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37] -; MUBUF-NEXT: s_mov_b64 s[2:3], s[38:39] +; MUBUF-NEXT: s_mov_b64 s[0:1], s[48:49] +; MUBUF-NEXT: s_mov_b64 s[2:3], s[50:51] ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; MUBUF-NEXT: s_and_saveexec_b32 s0, vcc_lo @@ -37,12 +37,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-NEXT: v_mov_b32_e32 v0, 0x4004 ; MUBUF-NEXT: s_mov_b32 s0, 0x41c64e6d ; MUBUF-NEXT: s_clause 0x1 -; MUBUF-NEXT: buffer_load_dword v1, v0, s[36:39], 0 offen -; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[48:51], 0 offen +; MUBUF-NEXT: buffer_load_dword v2, v0, s[48:51], 0 offen offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1 ; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 -; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen +; MUBUF-NEXT: buffer_store_dword v0, v0, s[48:51], 0 offen ; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit ; MUBUF-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 0e568e3071e99..3a078a64aa28e 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function no_free_scratch_sgpr_for_bp_copy --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Check that we properly realign the stack. While 4-byte access is all @@ -415,28 +416,21 @@ define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 { ; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 vcc_lo, s33 -; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 -; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 -; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v1, s34, 0 +; GCN-NEXT: s_mov_b32 s39, s34 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 +; GCN-NEXT: s_mov_b32 s38, s33 +; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 +; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: s_mov_b32 s32, s34 -; GCN-NEXT: v_readlane_b32 s34, v1, 0 +; GCN-NEXT: s_mov_b32 s34, s39 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:128 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_mov_b32 s33, vcc_lo -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_mov_b32 s33, s38 ; GCN-NEXT: s_setpc_b64 s[30:31] %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, ptr addrspace(5) %local_val, align 128 diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index e3a7f5eee74cb..f7300c921a745 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -32,24 +32,24 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-LABEL: kernel: ; GLOBALNESS1: ; %bb.0: ; %bb ; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7] -; GLOBALNESS1-NEXT: s_load_dwordx4 s[76:79], s[8:9], 0x0 +; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0 ; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], 0, 0 -; GLOBALNESS1-NEXT: global_store_dword v[44:45], v42, off +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[76:77] -; GLOBALNESS1-NEXT: s_mov_b64 s[40:41], s[4:5] +; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[96:97] +; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5] ; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20 ; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400 -; GLOBALNESS1-NEXT: s_bitcmp1_b32 s78, 0 +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s98, 0 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1] ; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0 @@ -59,89 +59,102 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GLOBALNESS1-NEXT: s_bitcmp1_b32 s6, 0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS1-NEXT: s_bitcmp1_b32 s7, 0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[64:65], 1, v0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0 +; GLOBALNESS1-NEXT: s_mov_b64 s[46:47], s[8:9] +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v46, 0x80 -; GLOBALNESS1-NEXT: s_mov_b32 s70, s16 -; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9] -; GLOBALNESS1-NEXT: s_mov_b32 s71, s15 -; GLOBALNESS1-NEXT: s_mov_b32 s72, s14 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s8, 0 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s9, 1 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3 +; GLOBALNESS1-NEXT: s_mov_b32 s82, s16 +; GLOBALNESS1-NEXT: s_mov_b32 s83, s15 +; GLOBALNESS1-NEXT: s_mov_b32 s84, s14 ; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11] -; GLOBALNESS1-NEXT: v_mov_b32_e32 v47, 0 ; GLOBALNESS1-NEXT: s_mov_b32 s32, 0 -; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56_vgpr57 +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr44_vgpr45 ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 1, v2 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 2 ; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 3 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v3 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v0 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v2 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 4 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 5 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v2 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 6 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 7 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s62, 8 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s63, 9 ; GLOBALNESS1-NEXT: s_branch .LBB1_4 ; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[60:61] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_28 +; GLOBALNESS1-NEXT: v_readlane_b32 s6, v56, 6 +; GLOBALNESS1-NEXT: v_readlane_b32 s7, v56, 7 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29 ; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow28 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[8:9] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[56:57], v[0:1], v[0:1] op_sel:[0,1] -; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_29 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30 ; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2 -; GLOBALNESS1-NEXT: flat_load_dword v40, v[46:47] -; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0 +; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1] +; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40 ; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0 -; GLOBALNESS1-NEXT: flat_load_dword v58, v[46:47] -; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1] +; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 ; GLOBALNESS1-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s72 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s71 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[46:47] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[62:63] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9 ; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lt_i32 s79, 1 +; GLOBALNESS1-NEXT: s_cmp_lt_i32 s99, 1 ; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7 ; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock12 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 1 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 1 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_8 @@ -151,190 +164,206 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: .LBB1_8: ; %LeafBlock ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s79, 0 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], -1 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24 ; GLOBALNESS1-NEXT: ; %bb.10: ; %baz.exit.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: flat_load_dword v0, v[44:45] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3] ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[74:75], s[62:63] -; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_25 +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[94:95] +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[44:45], off -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[54:55] +; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GLOBALNESS1-NEXT: v_readlane_b32 s4, v56, 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s5, v56, 3 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS1-NEXT: ; %bb.12: ; %bb39.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS1-NEXT: .LBB1_13: ; %bb44.lr.ph.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v58 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 ; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[64:65], 0, v2 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 +; GLOBALNESS1-NEXT: v_cmp_eq_u32_e64 s[96:97], 0, v2 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[98:99], 1, v0 ; GLOBALNESS1-NEXT: s_branch .LBB1_16 ; GLOBALNESS1-NEXT: .LBB1_14: ; %Flow16 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] ; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[52:53] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[68:69] +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25 ; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i ; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[48:49] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[64:65] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.17: ; %bb46.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[50:51] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[66:67] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.18: ; %bb50.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[42:43] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[50:51] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS1-NEXT: ; %bb.19: ; %bb3.i.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[44:45] +; GLOBALNESS1-NEXT: v_readlane_b32 s4, v56, 0 +; GLOBALNESS1-NEXT: v_readlane_b32 s5, v56, 1 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS1-NEXT: ; %bb.20: ; %bb6.i.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[66:67] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[98:99] ; GLOBALNESS1-NEXT: .LBB1_21: ; %spam.exit.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[56:57] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[80:81] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_add_u32 s68, s38, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s69, s39, 0 +; GLOBALNESS1-NEXT: s_add_u32 s78, s46, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s79, s47, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 -; GLOBALNESS1-NEXT: s_load_dwordx2 s[76:77], s[4:5], 0x0 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[68:69] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s72 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s71 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77] -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[68:69] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s72 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s71 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[56:57], off -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[76:77] -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[64:65] +; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_14 -; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23 +; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GLOBALNESS1-NEXT: s_branch .LBB1_3 +; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[46:47], 0x0 +; GLOBALNESS1-NEXT: v_readlane_b32 s62, v56, 8 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow24 +; GLOBALNESS1-NEXT: v_readlane_b32 s63, v56, 9 +; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[74:75] -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[62:63] +; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[94:95] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 -; GLOBALNESS1-NEXT: ; %bb.26: ; %bb67.i +; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[58:59] +; GLOBALNESS1-NEXT: v_readlane_b32 s6, v56, 4 +; GLOBALNESS1-NEXT: v_readlane_b32 s7, v56, 5 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1 -; GLOBALNESS1-NEXT: ; %bb.27: ; %bb69.i +; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_1 -; GLOBALNESS1-NEXT: .LBB1_28: ; %bb73.i +; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_2 -; GLOBALNESS1-NEXT: .LBB1_29: ; %loop.exit.guard +; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_31 -; GLOBALNESS1-NEXT: ; %bb.30: ; %bb7.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32 +; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i +; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s72 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s71 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 -; GLOBALNESS1-NEXT: .LBB1_31: ; %Flow +; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_33 -; GLOBALNESS1-NEXT: ; %bb.32: ; %bb11.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34 +; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i +; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS1-NEXT: s_mov_b32 s12, s72 -; GLOBALNESS1-NEXT: s_mov_b32 s13, s71 -; GLOBALNESS1-NEXT: s_mov_b32 s14, s70 +; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 +; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 +; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS1-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock +; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock ; ; GLOBALNESS0-LABEL: kernel: ; GLOBALNESS0: ; %bb.0: ; %bb ; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[6:7] -; GLOBALNESS0-NEXT: s_load_dwordx4 s[72:75], s[8:9], 0x0 +; GLOBALNESS0-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0 ; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], 0, 0 -; GLOBALNESS0-NEXT: global_store_dword v[44:45], v42, off +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 +; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[72:73] -; GLOBALNESS0-NEXT: s_mov_b64 s[40:41], s[4:5] +; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[96:97] +; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5] ; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20 ; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400 -; GLOBALNESS0-NEXT: s_bitcmp1_b32 s74, 0 +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s98, 0 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1] ; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0 @@ -344,89 +373,102 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GLOBALNESS0-NEXT: s_bitcmp1_b32 s6, 0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[42:43], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS0-NEXT: s_bitcmp1_b32 s7, 0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[48:49], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[64:65], 1, v0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[50:51], 1, v0 +; GLOBALNESS0-NEXT: s_mov_b64 s[46:47], s[8:9] +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[52:53], 1, v0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[44:45], 1, v1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[46:47], 1, v3 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v46, 0x80 -; GLOBALNESS0-NEXT: s_mov_b32 s68, s16 -; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9] -; GLOBALNESS0-NEXT: s_mov_b32 s69, s15 -; GLOBALNESS0-NEXT: s_mov_b32 s70, s14 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s8, 0 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s9, 1 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3 +; GLOBALNESS0-NEXT: s_mov_b32 s78, s16 +; GLOBALNESS0-NEXT: s_mov_b32 s79, s15 +; GLOBALNESS0-NEXT: s_mov_b32 s82, s14 ; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11] -; GLOBALNESS0-NEXT: v_mov_b32_e32 v47, 0 ; GLOBALNESS0-NEXT: s_mov_b32 s32, 0 -; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56_vgpr57 +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr44_vgpr45 ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc ; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 1, v2 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 2 ; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 3 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v3 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[54:55], 1, v0 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[56:57], 1, v1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[58:59], 1, v3 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[60:61], 1, v2 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 4 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 5 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v2 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 6 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 7 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s62, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s63, 9 ; GLOBALNESS0-NEXT: s_branch .LBB1_4 ; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[60:61] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_28 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v56, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v56, 7 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29 ; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] -; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow28 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[8:9] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[56:57], v[0:1], v[0:1] op_sel:[0,1] -; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_29 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30 ; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2 -; GLOBALNESS0-NEXT: flat_load_dword v40, v[46:47] -; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0 +; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1] +; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40 ; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0 -; GLOBALNESS0-NEXT: flat_load_dword v58, v[46:47] -; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1] +; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 ; GLOBALNESS0-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s70 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[46:47] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[62:63] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9 ; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lt_i32 s75, 1 +; GLOBALNESS0-NEXT: s_cmp_lt_i32 s99, 1 ; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7 ; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock12 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 1 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 1 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_8 @@ -436,175 +478,192 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: .LBB1_8: ; %LeafBlock ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s75, 0 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], -1 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24 ; GLOBALNESS0-NEXT: ; %bb.10: ; %baz.exit.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: flat_load_dword v0, v[44:45] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3] ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[62:63], 0, v0 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[76:77], s[62:63] -; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_25 +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[94:95] +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[44:45], off -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[54:55] +; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 2 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 3 +; GLOBALNESS0-NEXT: s_mov_b32 s83, s99 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS0-NEXT: .LBB1_13: ; %bb44.lr.ph.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v58 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 ; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[64:65], 0, v2 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 +; GLOBALNESS0-NEXT: v_cmp_eq_u32_e64 s[96:97], 0, v2 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[98:99], 1, v0 ; GLOBALNESS0-NEXT: s_branch .LBB1_16 ; GLOBALNESS0-NEXT: .LBB1_14: ; %Flow16 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] ; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[52:53] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[68:69] +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25 ; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i ; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[48:49] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[64:65] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.17: ; %bb46.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[50:51] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[66:67] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.18: ; %bb50.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[42:43] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[50:51] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS0-NEXT: ; %bb.19: ; %bb3.i.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[44:45] +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 1 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS0-NEXT: ; %bb.20: ; %bb6.i.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[66:67] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[98:99] ; GLOBALNESS0-NEXT: .LBB1_21: ; %spam.exit.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[56:57] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[80:81] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_add_u32 s72, s38, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s73, s39, 0 +; GLOBALNESS0-NEXT: s_add_u32 s84, s46, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s85, s47, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 -; GLOBALNESS0-NEXT: s_load_dwordx2 s[78:79], s[4:5], 0x0 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS0-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] -; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[72:73] +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s70 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79] -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] -; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[72:73] +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s70 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[56:57], off -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[78:79] -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[64:65] +; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_14 -; GLOBALNESS0-NEXT: .LBB1_24: ; %Flow23 +; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GLOBALNESS0-NEXT: s_branch .LBB1_3 +; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: v_readlane_b32 s62, v56, 8 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow24 +; GLOBALNESS0-NEXT: s_mov_b32 s99, s83 +; GLOBALNESS0-NEXT: v_readlane_b32 s63, v56, 9 +; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[76:77] -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[62:63] +; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[94:95] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 -; GLOBALNESS0-NEXT: ; %bb.26: ; %bb67.i +; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[58:59] +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v56, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v56, 5 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1 -; GLOBALNESS0-NEXT: ; %bb.27: ; %bb69.i +; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_1 -; GLOBALNESS0-NEXT: .LBB1_28: ; %bb73.i +; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 +; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_2 -; GLOBALNESS0-NEXT: .LBB1_29: ; %loop.exit.guard +; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_31 -; GLOBALNESS0-NEXT: ; %bb.30: ; %bb7.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32 +; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i +; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s70 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 -; GLOBALNESS0-NEXT: .LBB1_31: ; %Flow +; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_33 -; GLOBALNESS0-NEXT: ; %bb.32: ; %bb11.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34 +; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i +; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 -; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[40:41] +; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] -; GLOBALNESS0-NEXT: s_mov_b32 s12, s70 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s69 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s68 +; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS0-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock +; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock bb: store i32 0, ptr addrspace(1) null, align 4 %tmp4 = load i32, ptr addrspace(1) %arg1.global, align 4 br label %bb5 bb5: ; preds = %bb5.backedge, %bb - %tmp4.i.sroa.0.0 = phi <9 x double> [ poison, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ] + %tmp4.i.sroa.0.0 = phi <9 x double> [ undef, %bb ], [ %tmp4.i.sroa.0.1, %bb5.backedge ] %tmp14.1.i = load i32, ptr inttoptr (i64 128 to ptr), align 128 store i32 0, ptr addrspace(5) null, align 4 %tmp14.2.i = load i32, ptr inttoptr (i64 128 to ptr), align 128 diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir index b80c478c3761f..edb1f74d738f5 100644 --- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir @@ -27,11 +27,11 @@ body: | ; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr0 ; CHECK-NEXT: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr5 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1056964608 - ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0 ; CHECK-NEXT: renamable $sgpr8 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr5 @@ -43,16 +43,16 @@ body: | ; CHECK-NEXT: renamable $sgpr15 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1 ; CHECK-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr76_sgpr77_sgpr78_sgpr79 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1200 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1264 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr88_sgpr89_sgpr90_sgpr91 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1328 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6) @@ -68,10 +68,10 @@ body: | ; CHECK-NEXT: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) - ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index d9df80ce6c1c0..9afa0e2bb2dcd 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -20,16 +20,16 @@ define hidden void @widget() { ; GCN-NEXT: v_writelane_b32 v41, s35, 3 ; GCN-NEXT: v_writelane_b32 v41, s36, 4 ; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s38, 6 -; GCN-NEXT: v_writelane_b32 v41, s39, 7 -; GCN-NEXT: v_writelane_b32 v41, s40, 8 -; GCN-NEXT: v_writelane_b32 v41, s41, 9 -; GCN-NEXT: v_writelane_b32 v41, s42, 10 -; GCN-NEXT: v_writelane_b32 v41, s43, 11 -; GCN-NEXT: v_writelane_b32 v41, s44, 12 -; GCN-NEXT: v_writelane_b32 v41, s45, 13 -; GCN-NEXT: v_writelane_b32 v41, s46, 14 -; GCN-NEXT: v_writelane_b32 v41, s47, 15 +; GCN-NEXT: v_writelane_b32 v41, s46, 6 +; GCN-NEXT: v_writelane_b32 v41, s47, 7 +; GCN-NEXT: v_writelane_b32 v41, s48, 8 +; GCN-NEXT: v_writelane_b32 v41, s49, 9 +; GCN-NEXT: v_writelane_b32 v41, s50, 10 +; GCN-NEXT: v_writelane_b32 v41, s51, 11 +; GCN-NEXT: v_writelane_b32 v41, s52, 12 +; GCN-NEXT: v_writelane_b32 v41, s53, 13 +; GCN-NEXT: v_writelane_b32 v41, s62, 14 +; GCN-NEXT: v_writelane_b32 v41, s63, 15 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] @@ -37,7 +37,7 @@ define hidden void @widget() { ; GCN-NEXT: s_mov_b64 s[16:17], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 -; GCN-NEXT: s_mov_b64 s[46:47], 0 +; GCN-NEXT: s_mov_b64 s[62:63], 0 ; GCN-NEXT: s_mov_b64 s[18:19], 0 ; GCN-NEXT: s_cbranch_vccz .LBB0_9 ; GCN-NEXT: ; %bb.1: ; %Flow @@ -52,30 +52,30 @@ define hidden void @widget() { ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_mov_b64 s[34:35], s[4:5] ; GCN-NEXT: s_mov_b64 s[36:37], s[6:7] -; GCN-NEXT: s_mov_b64 s[38:39], s[8:9] -; GCN-NEXT: s_mov_b64 s[40:41], s[10:11] -; GCN-NEXT: s_mov_b32 s42, s12 -; GCN-NEXT: s_mov_b32 s43, s13 -; GCN-NEXT: s_mov_b32 s44, s14 -; GCN-NEXT: s_mov_b32 s45, s15 +; GCN-NEXT: s_mov_b64 s[46:47], s[8:9] +; GCN-NEXT: s_mov_b64 s[48:49], s[10:11] +; GCN-NEXT: s_mov_b32 s50, s12 +; GCN-NEXT: s_mov_b32 s51, s13 +; GCN-NEXT: s_mov_b32 s52, s14 +; GCN-NEXT: s_mov_b32 s53, s15 ; GCN-NEXT: v_mov_b32_e32 v40, v31 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_mov_b32_e32 v31, v40 -; GCN-NEXT: s_mov_b32 s12, s42 -; GCN-NEXT: s_mov_b32 s13, s43 -; GCN-NEXT: s_mov_b32 s14, s44 -; GCN-NEXT: s_mov_b32 s15, s45 +; GCN-NEXT: s_mov_b32 s12, s50 +; GCN-NEXT: s_mov_b32 s13, s51 +; GCN-NEXT: s_mov_b32 s14, s52 +; GCN-NEXT: s_mov_b32 s15, s53 ; GCN-NEXT: s_mov_b64 s[4:5], s[34:35] ; GCN-NEXT: s_mov_b64 s[6:7], s[36:37] -; GCN-NEXT: s_mov_b64 s[8:9], s[38:39] -; GCN-NEXT: s_mov_b64 s[10:11], s[40:41] +; GCN-NEXT: s_mov_b64 s[8:9], s[46:47] +; GCN-NEXT: s_mov_b64 s[10:11], s[48:49] ; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 ; GCN-NEXT: s_mov_b64 s[16:17], 0 -; GCN-NEXT: s_andn2_b64 s[18:19], s[46:47], exec +; GCN-NEXT: s_andn2_b64 s[18:19], s[62:63], exec ; GCN-NEXT: s_and_b64 s[20:21], vcc, exec -; GCN-NEXT: s_or_b64 s[46:47], s[18:19], s[20:21] +; GCN-NEXT: s_or_b64 s[62:63], s[18:19], s[20:21] ; GCN-NEXT: .LBB0_4: ; %Flow2 -; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[46:47] +; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[62:63] ; GCN-NEXT: s_xor_b64 s[18:19], exec, s[18:19] ; GCN-NEXT: s_cbranch_execz .LBB0_6 ; GCN-NEXT: ; %bb.5: ; %bb12 @@ -93,16 +93,16 @@ define hidden void @widget() { ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s47, v41, 15 -; GCN-NEXT: v_readlane_b32 s46, v41, 14 -; GCN-NEXT: v_readlane_b32 s45, v41, 13 -; GCN-NEXT: v_readlane_b32 s44, v41, 12 -; GCN-NEXT: v_readlane_b32 s43, v41, 11 -; GCN-NEXT: v_readlane_b32 s42, v41, 10 -; GCN-NEXT: v_readlane_b32 s41, v41, 9 -; GCN-NEXT: v_readlane_b32 s40, v41, 8 -; GCN-NEXT: v_readlane_b32 s39, v41, 7 -; GCN-NEXT: v_readlane_b32 s38, v41, 6 +; GCN-NEXT: v_readlane_b32 s63, v41, 15 +; GCN-NEXT: v_readlane_b32 s62, v41, 14 +; GCN-NEXT: v_readlane_b32 s53, v41, 13 +; GCN-NEXT: v_readlane_b32 s52, v41, 12 +; GCN-NEXT: v_readlane_b32 s51, v41, 11 +; GCN-NEXT: v_readlane_b32 s50, v41, 10 +; GCN-NEXT: v_readlane_b32 s49, v41, 9 +; GCN-NEXT: v_readlane_b32 s48, v41, 8 +; GCN-NEXT: v_readlane_b32 s47, v41, 7 +; GCN-NEXT: v_readlane_b32 s46, v41, 6 ; GCN-NEXT: v_readlane_b32 s37, v41, 5 ; GCN-NEXT: v_readlane_b32 s36, v41, 4 ; GCN-NEXT: v_readlane_b32 s35, v41, 3 @@ -119,7 +119,7 @@ define hidden void @widget() { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .LBB0_9: ; %bb2 -; GCN-NEXT: v_cmp_eq_u32_e64 s[46:47], 21, v0 +; GCN-NEXT: v_cmp_eq_u32_e64 s[62:63], 21, v0 ; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 21, v0 ; GCN-NEXT: s_mov_b64 vcc, exec ; GCN-NEXT: s_cbranch_execnz .LBB0_2 @@ -272,53 +272,53 @@ define hidden void @blam() { ; GCN-NEXT: v_writelane_b32 v45, s35, 3 ; GCN-NEXT: v_writelane_b32 v45, s36, 4 ; GCN-NEXT: v_writelane_b32 v45, s37, 5 -; GCN-NEXT: v_writelane_b32 v45, s38, 6 -; GCN-NEXT: v_writelane_b32 v45, s39, 7 -; GCN-NEXT: v_writelane_b32 v45, s40, 8 -; GCN-NEXT: v_writelane_b32 v45, s41, 9 -; GCN-NEXT: v_writelane_b32 v45, s42, 10 -; GCN-NEXT: v_writelane_b32 v45, s43, 11 -; GCN-NEXT: v_writelane_b32 v45, s44, 12 -; GCN-NEXT: v_writelane_b32 v45, s45, 13 -; GCN-NEXT: v_writelane_b32 v45, s46, 14 -; GCN-NEXT: v_writelane_b32 v45, s47, 15 -; GCN-NEXT: v_writelane_b32 v45, s48, 16 -; GCN-NEXT: v_writelane_b32 v45, s49, 17 -; GCN-NEXT: v_writelane_b32 v45, s50, 18 -; GCN-NEXT: v_writelane_b32 v45, s51, 19 -; GCN-NEXT: v_writelane_b32 v45, s52, 20 -; GCN-NEXT: v_writelane_b32 v45, s53, 21 -; GCN-NEXT: v_writelane_b32 v45, s54, 22 -; GCN-NEXT: v_writelane_b32 v45, s55, 23 -; GCN-NEXT: v_writelane_b32 v45, s56, 24 -; GCN-NEXT: v_writelane_b32 v45, s57, 25 +; GCN-NEXT: v_writelane_b32 v45, s46, 6 +; GCN-NEXT: v_writelane_b32 v45, s47, 7 +; GCN-NEXT: v_writelane_b32 v45, s48, 8 +; GCN-NEXT: v_writelane_b32 v45, s49, 9 +; GCN-NEXT: v_writelane_b32 v45, s50, 10 +; GCN-NEXT: v_writelane_b32 v45, s51, 11 +; GCN-NEXT: v_writelane_b32 v45, s52, 12 +; GCN-NEXT: v_writelane_b32 v45, s53, 13 +; GCN-NEXT: v_writelane_b32 v45, s62, 14 +; GCN-NEXT: v_writelane_b32 v45, s63, 15 +; GCN-NEXT: v_writelane_b32 v45, s64, 16 +; GCN-NEXT: v_writelane_b32 v45, s65, 17 +; GCN-NEXT: v_writelane_b32 v45, s66, 18 +; GCN-NEXT: v_writelane_b32 v45, s67, 19 +; GCN-NEXT: v_writelane_b32 v45, s68, 20 +; GCN-NEXT: v_writelane_b32 v45, s69, 21 +; GCN-NEXT: v_writelane_b32 v45, s78, 22 +; GCN-NEXT: v_writelane_b32 v45, s79, 23 +; GCN-NEXT: v_writelane_b32 v45, s80, 24 +; GCN-NEXT: v_writelane_b32 v45, s81, 25 ; GCN-NEXT: v_mov_b32_e32 v40, v31 -; GCN-NEXT: s_mov_b32 s46, s15 -; GCN-NEXT: s_mov_b32 s47, s14 -; GCN-NEXT: s_mov_b32 s48, s13 -; GCN-NEXT: s_mov_b32 s49, s12 +; GCN-NEXT: s_mov_b32 s62, s15 +; GCN-NEXT: s_mov_b32 s63, s14 +; GCN-NEXT: s_mov_b32 s64, s13 +; GCN-NEXT: s_mov_b32 s65, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] +; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v40 ; GCN-NEXT: flat_load_dword v43, v[0:1] ; GCN-NEXT: v_mov_b32_e32 v42, 0 -; GCN-NEXT: s_mov_b64 s[50:51], 0 +; GCN-NEXT: s_mov_b64 s[66:67], 0 ; GCN-NEXT: v_lshlrev_b32_e32 v41, 2, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: v_cmp_eq_f32_e64 s[52:53], 0, v43 -; GCN-NEXT: v_cmp_neq_f32_e64 s[42:43], 0, v43 +; GCN-NEXT: v_cmp_eq_f32_e64 s[68:69], 0, v43 +; GCN-NEXT: v_cmp_neq_f32_e64 s[50:51], 0, v43 ; GCN-NEXT: v_mov_b32_e32 v44, 0x7fc00000 ; GCN-NEXT: s_branch .LBB1_2 ; GCN-NEXT: .LBB1_1: ; %Flow7 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[8:9] ; GCN-NEXT: s_and_b64 s[4:5], exec, s[4:5] -; GCN-NEXT: s_or_b64 s[50:51], s[4:5], s[50:51] -; GCN-NEXT: s_andn2_b64 exec, exec, s[50:51] +; GCN-NEXT: s_or_b64 s[66:67], s[4:5], s[66:67] +; GCN-NEXT: s_andn2_b64 exec, exec, s[66:67] ; GCN-NEXT: s_cbranch_execz .LBB1_18 ; GCN-NEXT: .LBB1_2: ; %bb2 ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -329,26 +329,26 @@ define hidden void @blam() { ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b64 s[4:5], -1 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_xor_b64 s[54:55], exec, s[8:9] +; GCN-NEXT: s_xor_b64 s[78:79], exec, s[8:9] ; GCN-NEXT: s_cbranch_execz .LBB1_12 ; GCN-NEXT: ; %bb.3: ; %bb6 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GCN-NEXT: v_cmp_eq_u32_e64 s[44:45], 3, v0 -; GCN-NEXT: s_and_saveexec_b64 s[56:57], s[44:45] +; GCN-NEXT: v_cmp_eq_u32_e64 s[52:53], 3, v0 +; GCN-NEXT: s_and_saveexec_b64 s[80:81], s[52:53] ; GCN-NEXT: s_cbranch_execz .LBB1_11 ; GCN-NEXT: ; %bb.4: ; %bb11 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, spam@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, spam@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] +; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] +; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s49 -; GCN-NEXT: s_mov_b32 s13, s48 -; GCN-NEXT: s_mov_b32 s14, s47 -; GCN-NEXT: s_mov_b32 s15, s46 +; GCN-NEXT: s_mov_b32 s12, s65 +; GCN-NEXT: s_mov_b32 s13, s64 +; GCN-NEXT: s_mov_b32 s14, s63 +; GCN-NEXT: s_mov_b32 s15, s62 ; GCN-NEXT: v_mov_b32_e32 v31, v40 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 @@ -357,13 +357,13 @@ define hidden void @blam() { ; GCN-NEXT: s_cbranch_execz .LBB1_10 ; GCN-NEXT: ; %bb.5: ; %bb14 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GCN-NEXT: s_mov_b64 s[8:9], s[52:53] -; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[42:43] +; GCN-NEXT: s_mov_b64 s[8:9], s[68:69] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[50:51] ; GCN-NEXT: s_cbranch_execz .LBB1_7 ; GCN-NEXT: ; %bb.6: ; %bb16 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], 0 -; GCN-NEXT: s_or_b64 s[8:9], s[52:53], exec +; GCN-NEXT: s_or_b64 s[8:9], s[68:69], exec ; GCN-NEXT: .LBB1_7: ; %Flow3 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[6:7] @@ -382,19 +382,19 @@ define hidden void @blam() { ; GCN-NEXT: .LBB1_10: ; %Flow2 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] -; GCN-NEXT: s_andn2_b64 s[4:5], s[44:45], exec +; GCN-NEXT: s_andn2_b64 s[4:5], s[52:53], exec ; GCN-NEXT: s_and_b64 s[8:9], vcc, exec -; GCN-NEXT: s_or_b64 s[44:45], s[4:5], s[8:9] +; GCN-NEXT: s_or_b64 s[52:53], s[4:5], s[8:9] ; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec ; GCN-NEXT: .LBB1_11: ; %Flow1 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GCN-NEXT: s_or_b64 exec, exec, s[56:57] -; GCN-NEXT: s_orn2_b64 s[4:5], s[44:45], exec +; GCN-NEXT: s_or_b64 exec, exec, s[80:81] +; GCN-NEXT: s_orn2_b64 s[4:5], s[52:53], exec ; GCN-NEXT: s_and_b64 s[6:7], s[6:7], exec ; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: .LBB1_12: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[54:55] +; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[78:79] ; GCN-NEXT: s_cbranch_execz .LBB1_16 ; GCN-NEXT: ; %bb.13: ; %bb8 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 @@ -426,27 +426,27 @@ define hidden void @blam() { ; GCN-NEXT: s_andn2_b64 s[4:5], s[4:5], exec ; GCN-NEXT: s_branch .LBB1_1 ; GCN-NEXT: .LBB1_18: ; %DummyReturnBlock -; GCN-NEXT: s_or_b64 exec, exec, s[50:51] -; GCN-NEXT: v_readlane_b32 s57, v45, 25 -; GCN-NEXT: v_readlane_b32 s56, v45, 24 -; GCN-NEXT: v_readlane_b32 s55, v45, 23 -; GCN-NEXT: v_readlane_b32 s54, v45, 22 -; GCN-NEXT: v_readlane_b32 s53, v45, 21 -; GCN-NEXT: v_readlane_b32 s52, v45, 20 -; GCN-NEXT: v_readlane_b32 s51, v45, 19 -; GCN-NEXT: v_readlane_b32 s50, v45, 18 -; GCN-NEXT: v_readlane_b32 s49, v45, 17 -; GCN-NEXT: v_readlane_b32 s48, v45, 16 -; GCN-NEXT: v_readlane_b32 s47, v45, 15 -; GCN-NEXT: v_readlane_b32 s46, v45, 14 -; GCN-NEXT: v_readlane_b32 s45, v45, 13 -; GCN-NEXT: v_readlane_b32 s44, v45, 12 -; GCN-NEXT: v_readlane_b32 s43, v45, 11 -; GCN-NEXT: v_readlane_b32 s42, v45, 10 -; GCN-NEXT: v_readlane_b32 s41, v45, 9 -; GCN-NEXT: v_readlane_b32 s40, v45, 8 -; GCN-NEXT: v_readlane_b32 s39, v45, 7 -; GCN-NEXT: v_readlane_b32 s38, v45, 6 +; GCN-NEXT: s_or_b64 exec, exec, s[66:67] +; GCN-NEXT: v_readlane_b32 s81, v45, 25 +; GCN-NEXT: v_readlane_b32 s80, v45, 24 +; GCN-NEXT: v_readlane_b32 s79, v45, 23 +; GCN-NEXT: v_readlane_b32 s78, v45, 22 +; GCN-NEXT: v_readlane_b32 s69, v45, 21 +; GCN-NEXT: v_readlane_b32 s68, v45, 20 +; GCN-NEXT: v_readlane_b32 s67, v45, 19 +; GCN-NEXT: v_readlane_b32 s66, v45, 18 +; GCN-NEXT: v_readlane_b32 s65, v45, 17 +; GCN-NEXT: v_readlane_b32 s64, v45, 16 +; GCN-NEXT: v_readlane_b32 s63, v45, 15 +; GCN-NEXT: v_readlane_b32 s62, v45, 14 +; GCN-NEXT: v_readlane_b32 s53, v45, 13 +; GCN-NEXT: v_readlane_b32 s52, v45, 12 +; GCN-NEXT: v_readlane_b32 s51, v45, 11 +; GCN-NEXT: v_readlane_b32 s50, v45, 10 +; GCN-NEXT: v_readlane_b32 s49, v45, 9 +; GCN-NEXT: v_readlane_b32 s48, v45, 8 +; GCN-NEXT: v_readlane_b32 s47, v45, 7 +; GCN-NEXT: v_readlane_b32 s46, v45, 6 ; GCN-NEXT: v_readlane_b32 s37, v45, 5 ; GCN-NEXT: v_readlane_b32 s36, v45, 4 ; GCN-NEXT: v_readlane_b32 s35, v45, 3 diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir index 8a0bf26f81d22..670b7d7b8893b 100644 --- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir +++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir @@ -37,100 +37,74 @@ body: | ; MUBUF-LABEL: name: use_restore_frame_reg ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $vgpr1, $vgpr2 + ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1 ; MUBUF-NEXT: {{ $}} - ; MUBUF-NEXT: $sgpr4 = COPY $sgpr33 + ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc - ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5) - ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 - ; MUBUF-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec ; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec - ; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 155648, killed $vgpr3, implicit $exec - ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; MUBUF-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; MUBUF-NEXT: $vgpr2 = V_ADD_U32_e32 155648, killed $vgpr2, implicit $exec + ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; MUBUF-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; MUBUF-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr2 + ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.2: - ; MUBUF-NEXT: liveins: $vgpr2 + ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; MUBUF-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; MUBUF-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 - ; MUBUF-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; MUBUF-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 9961728, implicit-def dead $scc - ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5) - ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; MUBUF-NEXT: $sgpr33 = COPY $sgpr4 + ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 + ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 ; MUBUF-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: use_restore_frame_reg ; FLATSCR: bb.0: ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; FLATSCR-NEXT: liveins: $vgpr1, $vgpr2 + ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1 ; FLATSCR-NEXT: {{ $}} - ; FLATSCR-NEXT: $sgpr4 = COPY $sgpr33 + ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc - ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5) - ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2 - ; FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2 + ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc ; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc - ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc - ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33 - ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec - ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -8192, implicit-def $scc, implicit $scc - ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc - ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33 - ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc - ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc - ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33 - ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 - ; FLATSCR-NEXT: $sgpr33 = S_ADDC_U32 $sgpr33, -155648, implicit-def $scc, implicit $scc - ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr33, 0, implicit-def $scc - ; FLATSCR-NEXT: $sgpr33 = S_BITSET0_B32 0, $sgpr33 + ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc + ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc + ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40 + ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec + ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc + ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc + ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40 + ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: bb.1: ; FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; FLATSCR-NEXT: liveins: $vgpr2 + ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39 ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: S_NOP 0 ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: bb.2: - ; FLATSCR-NEXT: liveins: $vgpr2 + ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39 ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; FLATSCR-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 - ; FLATSCR-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1 - ; FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 155652, implicit-def dead $scc - ; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.20, addrspace 5) - ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; FLATSCR-NEXT: $sgpr33 = COPY $sgpr4 + ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 + ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 ; FLATSCR-NEXT: S_ENDPGM 0 bb.0: liveins: $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll index 5ced02f28c977..d0798b261abf0 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll @@ -33,36 +33,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: v_writelane_b32 v63, s36, 0 ; GFX900-NEXT: v_writelane_b32 v63, s37, 1 -; GFX900-NEXT: v_writelane_b32 v63, s38, 2 -; GFX900-NEXT: v_writelane_b32 v63, s39, 3 -; GFX900-NEXT: v_writelane_b32 v63, s40, 4 -; GFX900-NEXT: v_writelane_b32 v63, s41, 5 -; GFX900-NEXT: v_writelane_b32 v63, s42, 6 -; GFX900-NEXT: v_writelane_b32 v63, s43, 7 -; GFX900-NEXT: v_writelane_b32 v63, s44, 8 -; GFX900-NEXT: v_writelane_b32 v63, s45, 9 -; GFX900-NEXT: v_writelane_b32 v63, s46, 10 -; GFX900-NEXT: v_writelane_b32 v63, s47, 11 -; GFX900-NEXT: v_writelane_b32 v63, s48, 12 -; GFX900-NEXT: v_writelane_b32 v63, s49, 13 -; GFX900-NEXT: v_writelane_b32 v63, s50, 14 -; GFX900-NEXT: v_writelane_b32 v63, s51, 15 -; GFX900-NEXT: v_writelane_b32 v63, s52, 16 -; GFX900-NEXT: v_writelane_b32 v63, s53, 17 -; GFX900-NEXT: v_writelane_b32 v63, s54, 18 -; GFX900-NEXT: v_writelane_b32 v63, s55, 19 -; GFX900-NEXT: v_writelane_b32 v63, s56, 20 -; GFX900-NEXT: v_writelane_b32 v63, s57, 21 -; GFX900-NEXT: v_writelane_b32 v63, s58, 22 -; GFX900-NEXT: v_writelane_b32 v63, s59, 23 -; GFX900-NEXT: v_writelane_b32 v63, s60, 24 -; GFX900-NEXT: v_writelane_b32 v63, s61, 25 -; GFX900-NEXT: v_writelane_b32 v63, s62, 26 -; GFX900-NEXT: v_writelane_b32 v63, s63, 27 -; GFX900-NEXT: v_writelane_b32 v63, s64, 28 -; GFX900-NEXT: v_writelane_b32 v63, s65, 29 -; GFX900-NEXT: v_writelane_b32 v63, s66, 30 -; GFX900-NEXT: v_writelane_b32 v63, s67, 31 +; GFX900-NEXT: v_writelane_b32 v63, s46, 2 +; GFX900-NEXT: v_writelane_b32 v63, s47, 3 +; GFX900-NEXT: v_writelane_b32 v63, s48, 4 +; GFX900-NEXT: v_writelane_b32 v63, s49, 5 +; GFX900-NEXT: v_writelane_b32 v63, s50, 6 +; GFX900-NEXT: v_writelane_b32 v63, s51, 7 +; GFX900-NEXT: v_writelane_b32 v63, s52, 8 +; GFX900-NEXT: v_writelane_b32 v63, s53, 9 +; GFX900-NEXT: v_writelane_b32 v63, s62, 10 +; GFX900-NEXT: v_writelane_b32 v63, s63, 11 +; GFX900-NEXT: v_writelane_b32 v63, s64, 12 +; GFX900-NEXT: v_writelane_b32 v63, s65, 13 +; GFX900-NEXT: v_writelane_b32 v63, s66, 14 +; GFX900-NEXT: v_writelane_b32 v63, s67, 15 ; GFX900-NEXT: v_mov_b32_e32 v33, v30 ; GFX900-NEXT: v_mov_b32_e32 v34, v29 ; GFX900-NEXT: v_mov_b32_e32 v35, v28 @@ -160,36 +144,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX900-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec ; GFX900-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 ; GFX900-NEXT: v_mov_b32_e32 v0, 0 -; GFX900-NEXT: v_readlane_b32 s67, v63, 31 -; GFX900-NEXT: v_readlane_b32 s66, v63, 30 -; GFX900-NEXT: v_readlane_b32 s65, v63, 29 -; GFX900-NEXT: v_readlane_b32 s64, v63, 28 -; GFX900-NEXT: v_readlane_b32 s63, v63, 27 -; GFX900-NEXT: v_readlane_b32 s62, v63, 26 -; GFX900-NEXT: v_readlane_b32 s61, v63, 25 -; GFX900-NEXT: v_readlane_b32 s60, v63, 24 -; GFX900-NEXT: v_readlane_b32 s59, v63, 23 -; GFX900-NEXT: v_readlane_b32 s58, v63, 22 -; GFX900-NEXT: v_readlane_b32 s57, v63, 21 -; GFX900-NEXT: v_readlane_b32 s56, v63, 20 -; GFX900-NEXT: v_readlane_b32 s55, v63, 19 -; GFX900-NEXT: v_readlane_b32 s54, v63, 18 -; GFX900-NEXT: v_readlane_b32 s53, v63, 17 -; GFX900-NEXT: v_readlane_b32 s52, v63, 16 -; GFX900-NEXT: v_readlane_b32 s51, v63, 15 -; GFX900-NEXT: v_readlane_b32 s50, v63, 14 -; GFX900-NEXT: v_readlane_b32 s49, v63, 13 -; GFX900-NEXT: v_readlane_b32 s48, v63, 12 -; GFX900-NEXT: v_readlane_b32 s47, v63, 11 -; GFX900-NEXT: v_readlane_b32 s46, v63, 10 -; GFX900-NEXT: v_readlane_b32 s45, v63, 9 -; GFX900-NEXT: v_readlane_b32 s44, v63, 8 -; GFX900-NEXT: v_readlane_b32 s43, v63, 7 -; GFX900-NEXT: v_readlane_b32 s42, v63, 6 -; GFX900-NEXT: v_readlane_b32 s41, v63, 5 -; GFX900-NEXT: v_readlane_b32 s40, v63, 4 -; GFX900-NEXT: v_readlane_b32 s39, v63, 3 -; GFX900-NEXT: v_readlane_b32 s38, v63, 2 +; GFX900-NEXT: v_readlane_b32 s67, v63, 15 +; GFX900-NEXT: v_readlane_b32 s66, v63, 14 +; GFX900-NEXT: v_readlane_b32 s65, v63, 13 +; GFX900-NEXT: v_readlane_b32 s64, v63, 12 +; GFX900-NEXT: v_readlane_b32 s63, v63, 11 +; GFX900-NEXT: v_readlane_b32 s62, v63, 10 +; GFX900-NEXT: v_readlane_b32 s53, v63, 9 +; GFX900-NEXT: v_readlane_b32 s52, v63, 8 +; GFX900-NEXT: v_readlane_b32 s51, v63, 7 +; GFX900-NEXT: v_readlane_b32 s50, v63, 6 +; GFX900-NEXT: v_readlane_b32 s49, v63, 5 +; GFX900-NEXT: v_readlane_b32 s48, v63, 4 +; GFX900-NEXT: v_readlane_b32 s47, v63, 3 +; GFX900-NEXT: v_readlane_b32 s46, v63, 2 ; GFX900-NEXT: v_readlane_b32 s37, v63, 1 ; GFX900-NEXT: v_readlane_b32 s36, v63, 0 ; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -236,36 +204,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX906-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX906-NEXT: v_writelane_b32 v63, s36, 0 ; GFX906-NEXT: v_writelane_b32 v63, s37, 1 -; GFX906-NEXT: v_writelane_b32 v63, s38, 2 -; GFX906-NEXT: v_writelane_b32 v63, s39, 3 -; GFX906-NEXT: v_writelane_b32 v63, s40, 4 -; GFX906-NEXT: v_writelane_b32 v63, s41, 5 -; GFX906-NEXT: v_writelane_b32 v63, s42, 6 -; GFX906-NEXT: v_writelane_b32 v63, s43, 7 -; GFX906-NEXT: v_writelane_b32 v63, s44, 8 -; GFX906-NEXT: v_writelane_b32 v63, s45, 9 -; GFX906-NEXT: v_writelane_b32 v63, s46, 10 -; GFX906-NEXT: v_writelane_b32 v63, s47, 11 -; GFX906-NEXT: v_writelane_b32 v63, s48, 12 -; GFX906-NEXT: v_writelane_b32 v63, s49, 13 -; GFX906-NEXT: v_writelane_b32 v63, s50, 14 -; GFX906-NEXT: v_writelane_b32 v63, s51, 15 -; GFX906-NEXT: v_writelane_b32 v63, s52, 16 -; GFX906-NEXT: v_writelane_b32 v63, s53, 17 -; GFX906-NEXT: v_writelane_b32 v63, s54, 18 -; GFX906-NEXT: v_writelane_b32 v63, s55, 19 -; GFX906-NEXT: v_writelane_b32 v63, s56, 20 -; GFX906-NEXT: v_writelane_b32 v63, s57, 21 -; GFX906-NEXT: v_writelane_b32 v63, s58, 22 -; GFX906-NEXT: v_writelane_b32 v63, s59, 23 -; GFX906-NEXT: v_writelane_b32 v63, s60, 24 -; GFX906-NEXT: v_writelane_b32 v63, s61, 25 -; GFX906-NEXT: v_writelane_b32 v63, s62, 26 -; GFX906-NEXT: v_writelane_b32 v63, s63, 27 -; GFX906-NEXT: v_writelane_b32 v63, s64, 28 -; GFX906-NEXT: v_writelane_b32 v63, s65, 29 -; GFX906-NEXT: v_writelane_b32 v63, s66, 30 -; GFX906-NEXT: v_writelane_b32 v63, s67, 31 +; GFX906-NEXT: v_writelane_b32 v63, s46, 2 +; GFX906-NEXT: v_writelane_b32 v63, s47, 3 +; GFX906-NEXT: v_writelane_b32 v63, s48, 4 +; GFX906-NEXT: v_writelane_b32 v63, s49, 5 +; GFX906-NEXT: v_writelane_b32 v63, s50, 6 +; GFX906-NEXT: v_writelane_b32 v63, s51, 7 +; GFX906-NEXT: v_writelane_b32 v63, s52, 8 +; GFX906-NEXT: v_writelane_b32 v63, s53, 9 +; GFX906-NEXT: v_writelane_b32 v63, s62, 10 +; GFX906-NEXT: v_writelane_b32 v63, s63, 11 +; GFX906-NEXT: v_writelane_b32 v63, s64, 12 +; GFX906-NEXT: v_writelane_b32 v63, s65, 13 +; GFX906-NEXT: v_writelane_b32 v63, s66, 14 +; GFX906-NEXT: v_writelane_b32 v63, s67, 15 ; GFX906-NEXT: v_mov_b32_e32 v33, v30 ; GFX906-NEXT: v_mov_b32_e32 v34, v29 ; GFX906-NEXT: v_mov_b32_e32 v35, v28 @@ -363,36 +315,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX906-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec ; GFX906-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 ; GFX906-NEXT: v_mov_b32_e32 v0, 0 -; GFX906-NEXT: v_readlane_b32 s67, v63, 31 -; GFX906-NEXT: v_readlane_b32 s66, v63, 30 -; GFX906-NEXT: v_readlane_b32 s65, v63, 29 -; GFX906-NEXT: v_readlane_b32 s64, v63, 28 -; GFX906-NEXT: v_readlane_b32 s63, v63, 27 -; GFX906-NEXT: v_readlane_b32 s62, v63, 26 -; GFX906-NEXT: v_readlane_b32 s61, v63, 25 -; GFX906-NEXT: v_readlane_b32 s60, v63, 24 -; GFX906-NEXT: v_readlane_b32 s59, v63, 23 -; GFX906-NEXT: v_readlane_b32 s58, v63, 22 -; GFX906-NEXT: v_readlane_b32 s57, v63, 21 -; GFX906-NEXT: v_readlane_b32 s56, v63, 20 -; GFX906-NEXT: v_readlane_b32 s55, v63, 19 -; GFX906-NEXT: v_readlane_b32 s54, v63, 18 -; GFX906-NEXT: v_readlane_b32 s53, v63, 17 -; GFX906-NEXT: v_readlane_b32 s52, v63, 16 -; GFX906-NEXT: v_readlane_b32 s51, v63, 15 -; GFX906-NEXT: v_readlane_b32 s50, v63, 14 -; GFX906-NEXT: v_readlane_b32 s49, v63, 13 -; GFX906-NEXT: v_readlane_b32 s48, v63, 12 -; GFX906-NEXT: v_readlane_b32 s47, v63, 11 -; GFX906-NEXT: v_readlane_b32 s46, v63, 10 -; GFX906-NEXT: v_readlane_b32 s45, v63, 9 -; GFX906-NEXT: v_readlane_b32 s44, v63, 8 -; GFX906-NEXT: v_readlane_b32 s43, v63, 7 -; GFX906-NEXT: v_readlane_b32 s42, v63, 6 -; GFX906-NEXT: v_readlane_b32 s41, v63, 5 -; GFX906-NEXT: v_readlane_b32 s40, v63, 4 -; GFX906-NEXT: v_readlane_b32 s39, v63, 3 -; GFX906-NEXT: v_readlane_b32 s38, v63, 2 +; GFX906-NEXT: v_readlane_b32 s67, v63, 15 +; GFX906-NEXT: v_readlane_b32 s66, v63, 14 +; GFX906-NEXT: v_readlane_b32 s65, v63, 13 +; GFX906-NEXT: v_readlane_b32 s64, v63, 12 +; GFX906-NEXT: v_readlane_b32 s63, v63, 11 +; GFX906-NEXT: v_readlane_b32 s62, v63, 10 +; GFX906-NEXT: v_readlane_b32 s53, v63, 9 +; GFX906-NEXT: v_readlane_b32 s52, v63, 8 +; GFX906-NEXT: v_readlane_b32 s51, v63, 7 +; GFX906-NEXT: v_readlane_b32 s50, v63, 6 +; GFX906-NEXT: v_readlane_b32 s49, v63, 5 +; GFX906-NEXT: v_readlane_b32 s48, v63, 4 +; GFX906-NEXT: v_readlane_b32 s47, v63, 3 +; GFX906-NEXT: v_readlane_b32 s46, v63, 2 ; GFX906-NEXT: v_readlane_b32 s37, v63, 1 ; GFX906-NEXT: v_readlane_b32 s36, v63, 0 ; GFX906-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -438,36 +374,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX908-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse ; GFX908-NEXT: v_writelane_b32 v62, s36, 0 ; GFX908-NEXT: v_writelane_b32 v62, s37, 1 -; GFX908-NEXT: v_writelane_b32 v62, s38, 2 -; GFX908-NEXT: v_writelane_b32 v62, s39, 3 -; GFX908-NEXT: v_writelane_b32 v62, s40, 4 -; GFX908-NEXT: v_writelane_b32 v62, s41, 5 -; GFX908-NEXT: v_writelane_b32 v62, s42, 6 -; GFX908-NEXT: v_writelane_b32 v62, s43, 7 -; GFX908-NEXT: v_writelane_b32 v62, s44, 8 -; GFX908-NEXT: v_writelane_b32 v62, s45, 9 -; GFX908-NEXT: v_writelane_b32 v62, s46, 10 -; GFX908-NEXT: v_writelane_b32 v62, s47, 11 -; GFX908-NEXT: v_writelane_b32 v62, s48, 12 -; GFX908-NEXT: v_writelane_b32 v62, s49, 13 -; GFX908-NEXT: v_writelane_b32 v62, s50, 14 -; GFX908-NEXT: v_writelane_b32 v62, s51, 15 -; GFX908-NEXT: v_writelane_b32 v62, s52, 16 -; GFX908-NEXT: v_writelane_b32 v62, s53, 17 -; GFX908-NEXT: v_writelane_b32 v62, s54, 18 -; GFX908-NEXT: v_writelane_b32 v62, s55, 19 -; GFX908-NEXT: v_writelane_b32 v62, s56, 20 -; GFX908-NEXT: v_writelane_b32 v62, s57, 21 -; GFX908-NEXT: v_writelane_b32 v62, s58, 22 -; GFX908-NEXT: v_writelane_b32 v62, s59, 23 -; GFX908-NEXT: v_writelane_b32 v62, s60, 24 -; GFX908-NEXT: v_writelane_b32 v62, s61, 25 -; GFX908-NEXT: v_writelane_b32 v62, s62, 26 -; GFX908-NEXT: v_writelane_b32 v62, s63, 27 -; GFX908-NEXT: v_writelane_b32 v62, s64, 28 -; GFX908-NEXT: v_writelane_b32 v62, s65, 29 -; GFX908-NEXT: v_writelane_b32 v62, s66, 30 -; GFX908-NEXT: v_writelane_b32 v62, s67, 31 +; GFX908-NEXT: v_writelane_b32 v62, s46, 2 +; GFX908-NEXT: v_writelane_b32 v62, s47, 3 +; GFX908-NEXT: v_writelane_b32 v62, s48, 4 +; GFX908-NEXT: v_writelane_b32 v62, s49, 5 +; GFX908-NEXT: v_writelane_b32 v62, s50, 6 +; GFX908-NEXT: v_writelane_b32 v62, s51, 7 +; GFX908-NEXT: v_writelane_b32 v62, s52, 8 +; GFX908-NEXT: v_writelane_b32 v62, s53, 9 +; GFX908-NEXT: v_writelane_b32 v62, s62, 10 +; GFX908-NEXT: v_writelane_b32 v62, s63, 11 +; GFX908-NEXT: v_writelane_b32 v62, s64, 12 +; GFX908-NEXT: v_writelane_b32 v62, s65, 13 +; GFX908-NEXT: v_writelane_b32 v62, s66, 14 +; GFX908-NEXT: v_writelane_b32 v62, s67, 15 ; GFX908-NEXT: v_mov_b32_e32 v33, v30 ; GFX908-NEXT: v_mov_b32_e32 v34, v29 ; GFX908-NEXT: v_mov_b32_e32 v35, v28 @@ -569,36 +489,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX908-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec ; GFX908-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 ; GFX908-NEXT: v_mov_b32_e32 v0, 0 -; GFX908-NEXT: v_readlane_b32 s67, v62, 31 -; GFX908-NEXT: v_readlane_b32 s66, v62, 30 -; GFX908-NEXT: v_readlane_b32 s65, v62, 29 -; GFX908-NEXT: v_readlane_b32 s64, v62, 28 -; GFX908-NEXT: v_readlane_b32 s63, v62, 27 -; GFX908-NEXT: v_readlane_b32 s62, v62, 26 -; GFX908-NEXT: v_readlane_b32 s61, v62, 25 -; GFX908-NEXT: v_readlane_b32 s60, v62, 24 -; GFX908-NEXT: v_readlane_b32 s59, v62, 23 -; GFX908-NEXT: v_readlane_b32 s58, v62, 22 -; GFX908-NEXT: v_readlane_b32 s57, v62, 21 -; GFX908-NEXT: v_readlane_b32 s56, v62, 20 -; GFX908-NEXT: v_readlane_b32 s55, v62, 19 -; GFX908-NEXT: v_readlane_b32 s54, v62, 18 -; GFX908-NEXT: v_readlane_b32 s53, v62, 17 -; GFX908-NEXT: v_readlane_b32 s52, v62, 16 -; GFX908-NEXT: v_readlane_b32 s51, v62, 15 -; GFX908-NEXT: v_readlane_b32 s50, v62, 14 -; GFX908-NEXT: v_readlane_b32 s49, v62, 13 -; GFX908-NEXT: v_readlane_b32 s48, v62, 12 -; GFX908-NEXT: v_readlane_b32 s47, v62, 11 -; GFX908-NEXT: v_readlane_b32 s46, v62, 10 -; GFX908-NEXT: v_readlane_b32 s45, v62, 9 -; GFX908-NEXT: v_readlane_b32 s44, v62, 8 -; GFX908-NEXT: v_readlane_b32 s43, v62, 7 -; GFX908-NEXT: v_readlane_b32 s42, v62, 6 -; GFX908-NEXT: v_readlane_b32 s41, v62, 5 -; GFX908-NEXT: v_readlane_b32 s40, v62, 4 -; GFX908-NEXT: v_readlane_b32 s39, v62, 3 -; GFX908-NEXT: v_readlane_b32 s38, v62, 2 +; GFX908-NEXT: v_readlane_b32 s67, v62, 15 +; GFX908-NEXT: v_readlane_b32 s66, v62, 14 +; GFX908-NEXT: v_readlane_b32 s65, v62, 13 +; GFX908-NEXT: v_readlane_b32 s64, v62, 12 +; GFX908-NEXT: v_readlane_b32 s63, v62, 11 +; GFX908-NEXT: v_readlane_b32 s62, v62, 10 +; GFX908-NEXT: v_readlane_b32 s53, v62, 9 +; GFX908-NEXT: v_readlane_b32 s52, v62, 8 +; GFX908-NEXT: v_readlane_b32 s51, v62, 7 +; GFX908-NEXT: v_readlane_b32 s50, v62, 6 +; GFX908-NEXT: v_readlane_b32 s49, v62, 5 +; GFX908-NEXT: v_readlane_b32 s48, v62, 4 +; GFX908-NEXT: v_readlane_b32 s47, v62, 3 +; GFX908-NEXT: v_readlane_b32 s46, v62, 2 ; GFX908-NEXT: v_readlane_b32 s37, v62, 1 ; GFX908-NEXT: v_readlane_b32 s36, v62, 0 ; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse @@ -644,36 +548,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX90a-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse ; GFX90a-NEXT: v_writelane_b32 v63, s36, 0 ; GFX90a-NEXT: v_writelane_b32 v63, s37, 1 -; GFX90a-NEXT: v_writelane_b32 v63, s38, 2 -; GFX90a-NEXT: v_writelane_b32 v63, s39, 3 -; GFX90a-NEXT: v_writelane_b32 v63, s40, 4 -; GFX90a-NEXT: v_writelane_b32 v63, s41, 5 -; GFX90a-NEXT: v_writelane_b32 v63, s42, 6 -; GFX90a-NEXT: v_writelane_b32 v63, s43, 7 -; GFX90a-NEXT: v_writelane_b32 v63, s44, 8 -; GFX90a-NEXT: v_writelane_b32 v63, s45, 9 -; GFX90a-NEXT: v_writelane_b32 v63, s46, 10 -; GFX90a-NEXT: v_writelane_b32 v63, s47, 11 -; GFX90a-NEXT: v_writelane_b32 v63, s48, 12 -; GFX90a-NEXT: v_writelane_b32 v63, s49, 13 -; GFX90a-NEXT: v_writelane_b32 v63, s50, 14 -; GFX90a-NEXT: v_writelane_b32 v63, s51, 15 -; GFX90a-NEXT: v_writelane_b32 v63, s52, 16 -; GFX90a-NEXT: v_writelane_b32 v63, s53, 17 -; GFX90a-NEXT: v_writelane_b32 v63, s54, 18 -; GFX90a-NEXT: v_writelane_b32 v63, s55, 19 -; GFX90a-NEXT: v_writelane_b32 v63, s56, 20 -; GFX90a-NEXT: v_writelane_b32 v63, s57, 21 -; GFX90a-NEXT: v_writelane_b32 v63, s58, 22 -; GFX90a-NEXT: v_writelane_b32 v63, s59, 23 -; GFX90a-NEXT: v_writelane_b32 v63, s60, 24 -; GFX90a-NEXT: v_writelane_b32 v63, s61, 25 -; GFX90a-NEXT: v_writelane_b32 v63, s62, 26 -; GFX90a-NEXT: v_writelane_b32 v63, s63, 27 -; GFX90a-NEXT: v_writelane_b32 v63, s64, 28 -; GFX90a-NEXT: v_writelane_b32 v63, s65, 29 -; GFX90a-NEXT: v_writelane_b32 v63, s66, 30 -; GFX90a-NEXT: v_writelane_b32 v63, s67, 31 +; GFX90a-NEXT: v_writelane_b32 v63, s46, 2 +; GFX90a-NEXT: v_writelane_b32 v63, s47, 3 +; GFX90a-NEXT: v_writelane_b32 v63, s48, 4 +; GFX90a-NEXT: v_writelane_b32 v63, s49, 5 +; GFX90a-NEXT: v_writelane_b32 v63, s50, 6 +; GFX90a-NEXT: v_writelane_b32 v63, s51, 7 +; GFX90a-NEXT: v_writelane_b32 v63, s52, 8 +; GFX90a-NEXT: v_writelane_b32 v63, s53, 9 +; GFX90a-NEXT: v_writelane_b32 v63, s62, 10 +; GFX90a-NEXT: v_writelane_b32 v63, s63, 11 +; GFX90a-NEXT: v_writelane_b32 v63, s64, 12 +; GFX90a-NEXT: v_writelane_b32 v63, s65, 13 +; GFX90a-NEXT: v_writelane_b32 v63, s66, 14 +; GFX90a-NEXT: v_writelane_b32 v63, s67, 15 ; GFX90a-NEXT: v_mov_b32_e32 v33, v30 ; GFX90a-NEXT: v_mov_b32_e32 v34, v29 ; GFX90a-NEXT: v_mov_b32_e32 v35, v28 @@ -771,36 +659,20 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX90a-NEXT: ; kill: def $vgpr31 killed $vgpr32 killed $exec ; GFX90a-NEXT: ; implicit-def: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 ; GFX90a-NEXT: v_mov_b32_e32 v0, 0 -; GFX90a-NEXT: v_readlane_b32 s67, v63, 31 -; GFX90a-NEXT: v_readlane_b32 s66, v63, 30 -; GFX90a-NEXT: v_readlane_b32 s65, v63, 29 -; GFX90a-NEXT: v_readlane_b32 s64, v63, 28 -; GFX90a-NEXT: v_readlane_b32 s63, v63, 27 -; GFX90a-NEXT: v_readlane_b32 s62, v63, 26 -; GFX90a-NEXT: v_readlane_b32 s61, v63, 25 -; GFX90a-NEXT: v_readlane_b32 s60, v63, 24 -; GFX90a-NEXT: v_readlane_b32 s59, v63, 23 -; GFX90a-NEXT: v_readlane_b32 s58, v63, 22 -; GFX90a-NEXT: v_readlane_b32 s57, v63, 21 -; GFX90a-NEXT: v_readlane_b32 s56, v63, 20 -; GFX90a-NEXT: v_readlane_b32 s55, v63, 19 -; GFX90a-NEXT: v_readlane_b32 s54, v63, 18 -; GFX90a-NEXT: v_readlane_b32 s53, v63, 17 -; GFX90a-NEXT: v_readlane_b32 s52, v63, 16 -; GFX90a-NEXT: v_readlane_b32 s51, v63, 15 -; GFX90a-NEXT: v_readlane_b32 s50, v63, 14 -; GFX90a-NEXT: v_readlane_b32 s49, v63, 13 -; GFX90a-NEXT: v_readlane_b32 s48, v63, 12 -; GFX90a-NEXT: v_readlane_b32 s47, v63, 11 -; GFX90a-NEXT: v_readlane_b32 s46, v63, 10 -; GFX90a-NEXT: v_readlane_b32 s45, v63, 9 -; GFX90a-NEXT: v_readlane_b32 s44, v63, 8 -; GFX90a-NEXT: v_readlane_b32 s43, v63, 7 -; GFX90a-NEXT: v_readlane_b32 s42, v63, 6 -; GFX90a-NEXT: v_readlane_b32 s41, v63, 5 -; GFX90a-NEXT: v_readlane_b32 s40, v63, 4 -; GFX90a-NEXT: v_readlane_b32 s39, v63, 3 -; GFX90a-NEXT: v_readlane_b32 s38, v63, 2 +; GFX90a-NEXT: v_readlane_b32 s67, v63, 15 +; GFX90a-NEXT: v_readlane_b32 s66, v63, 14 +; GFX90a-NEXT: v_readlane_b32 s65, v63, 13 +; GFX90a-NEXT: v_readlane_b32 s64, v63, 12 +; GFX90a-NEXT: v_readlane_b32 s63, v63, 11 +; GFX90a-NEXT: v_readlane_b32 s62, v63, 10 +; GFX90a-NEXT: v_readlane_b32 s53, v63, 9 +; GFX90a-NEXT: v_readlane_b32 s52, v63, 8 +; GFX90a-NEXT: v_readlane_b32 s51, v63, 7 +; GFX90a-NEXT: v_readlane_b32 s50, v63, 6 +; GFX90a-NEXT: v_readlane_b32 s49, v63, 5 +; GFX90a-NEXT: v_readlane_b32 s48, v63, 4 +; GFX90a-NEXT: v_readlane_b32 s47, v63, 3 +; GFX90a-NEXT: v_readlane_b32 s46, v63, 2 ; GFX90a-NEXT: v_readlane_b32 s37, v63, 1 ; GFX90a-NEXT: v_readlane_b32 s36, v63, 0 ; GFX90a-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse diff --git a/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir b/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir index 4d6e33cf0b68a..b427c5bdd7229 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir @@ -1,8 +1,7 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck %s # CHECK: csr_sgpr_spill -# CHECK: spillPhysVGPRs -# CHECK-NEXT: - '$vgpr0' +# CHECK-NOT: spillPhysVGPRs --- name: csr_sgpr_spill tracksRegLiveness: true From 627fe1be0d012bbea8c85a4d899163e52e25970b Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Tue, 18 Feb 2025 12:46:55 -0500 Subject: [PATCH 2/3] Start the partition from s40 --- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 14 +- .../amdgpu-simplify-libcall-pow-codegen.ll | 60 +- llvm/test/CodeGen/AMDGPU/bf16.ll | 168 +- ...der-no-live-segment-at-def-implicit-def.ll | 42 +- .../branch-folding-implicit-def-subreg.ll | 191 +- .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 36 +- llvm/test/CodeGen/AMDGPU/call-args-inreg.ll | 12 +- .../CodeGen/AMDGPU/call-argument-types.ll | 2520 ++++++++--------- .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 1426 +++++----- .../AMDGPU/csr-sgpr-spill-live-ins.mir | 6 - llvm/test/CodeGen/AMDGPU/ds_read2.ll | 36 +- .../AMDGPU/dwarf-multi-register-use-crash.ll | 26 +- .../eliminate-frame-index-s-mov-b32.mir | 44 +- .../CodeGen/AMDGPU/function-args-inreg.ll | 4 +- .../CodeGen/AMDGPU/function-resource-usage.ll | 12 +- .../CodeGen/AMDGPU/gfx-call-non-gfx-func.ll | 128 +- .../AMDGPU/gfx-callable-argument-types.ll | 64 +- .../AMDGPU/global_atomics_scan_fadd.ll | 1828 ++++++------ .../AMDGPU/global_atomics_scan_fmax.ll | 1276 ++++----- .../AMDGPU/global_atomics_scan_fmin.ll | 1276 ++++----- .../AMDGPU/global_atomics_scan_fsub.ll | 1828 ++++++------ .../greedy-alloc-fail-sgpr1024-spill.mir | 76 +- .../identical-subrange-spill-infloop.ll | 48 +- llvm/test/CodeGen/AMDGPU/indirect-call.ll | 320 +-- llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +- .../llvm.amdgcn.pops.exiting.wave.id.ll | 48 +- llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll | 12 +- llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 36 +- llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll | 12 +- llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 36 +- .../lower-work-group-id-intrinsics-hsa.ll | 64 +- ...ne-sink-temporal-divergence-swdev407790.ll | 144 +- .../AMDGPU/materialize-frame-index-sgpr.ll | 342 ++- ...-knownbits-assign-crash-gh-issue-110930.ll | 10 +- .../AMDGPU/pei-scavenge-sgpr-carry-out.mir | 40 +- .../CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 28 +- .../test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 14 +- .../AMDGPU/promote-constOffset-to-imm.ll | 446 +-- .../ran-out-of-sgprs-allocation-failure.mir | 119 +- llvm/test/CodeGen/AMDGPU/select.f16.ll | 4 +- .../sgpr-spill-update-only-slot-indexes.ll | 16 +- .../AMDGPU/shufflevector.v2i64.v8i64.ll | 448 +-- llvm/test/CodeGen/AMDGPU/sibling-call.ll | 72 +- .../AMDGPU/snippet-copy-bundle-regression.mir | 58 +- .../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 10 +- .../AMDGPU/spill-sgpr-used-for-exec-copy.mir | 4 +- .../spill_more_than_wavesize_csr_sgprs.ll | 64 +- ...tack-pointer-offset-relative-frameindex.ll | 22 +- llvm/test/CodeGen/AMDGPU/stack-realign.ll | 418 +-- .../AMDGPU/tuple-allocation-failure.ll | 144 +- .../unallocatable-bundle-regression.mir | 48 +- .../AMDGPU/unstructured-cfg-def-use-issue.ll | 70 +- .../CodeGen/AMDGPU/use_restore_frame_reg.mir | 44 +- .../AMDGPU/vgpr-large-tuple-alloc-error.ll | 64 +- 54 files changed, 7102 insertions(+), 7178 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index e3861a7d06c3d..e891fdba4e03e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -91,11 +91,15 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs< >; def CSR_AMDGPU_SGPRs : CalleeSavedRegs< - (add (sequence "SGPR%u", 30, 37), - (sequence "SGPR%u", 46, 53), - (sequence "SGPR%u", 62, 69), - (sequence "SGPR%u", 78, 85), - (sequence "SGPR%u", 94, 105)) + // Ensure that s30-s31 (return address), s32 (stack pointer), s33 (frame pointer), + // and s34 (base pointer) are callee-saved. The striped layout starts from s40, + // with a stripe width of 8. The last stripe is 10 wide instead of 8, to avoid + // ending with a 2-wide stripe. + (add (sequence "SGPR%u", 30, 39), + (sequence "SGPR%u", 48, 55), + (sequence "SGPR%u", 64, 71), + (sequence "SGPR%u", 80, 87), + (sequence "SGPR%u", 96, 105)) >; def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 440e509ec947f..5bda853b76727 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -124,8 +124,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s46, 6 -; CHECK-NEXT: v_writelane_b32 v43, s47, 7 +; CHECK-NEXT: v_writelane_b32 v43, s38, 6 +; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 ; CHECK-NEXT: v_writelane_b32 v43, s48, 8 ; CHECK-NEXT: v_writelane_b32 v43, s49, 9 @@ -152,7 +152,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 @@ -162,7 +162,7 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] ; CHECK-NEXT: s_mov_b32 s12, s53 @@ -184,8 +184,8 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: v_readlane_b32 s50, v43, 10 ; CHECK-NEXT: v_readlane_b32 s49, v43, 9 ; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s47, v43, 7 -; CHECK-NEXT: v_readlane_b32 s46, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 7 +; CHECK-NEXT: v_readlane_b32 s38, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -264,8 +264,8 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s46, 6 -; CHECK-NEXT: v_writelane_b32 v43, s47, 7 +; CHECK-NEXT: v_writelane_b32 v43, s38, 6 +; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 ; CHECK-NEXT: v_writelane_b32 v43, s48, 8 ; CHECK-NEXT: v_writelane_b32 v43, s49, 9 @@ -291,7 +291,7 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1] @@ -300,7 +300,7 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] ; CHECK-NEXT: s_mov_b32 s12, s53 @@ -319,8 +319,8 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: v_readlane_b32 s50, v43, 10 ; CHECK-NEXT: v_readlane_b32 s49, v43, 9 ; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s47, v43, 7 -; CHECK-NEXT: v_readlane_b32 s46, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 7 +; CHECK-NEXT: v_readlane_b32 s38, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -406,8 +406,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s46, 6 -; CHECK-NEXT: v_writelane_b32 v43, s47, 7 +; CHECK-NEXT: v_writelane_b32 v43, s38, 6 +; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 ; CHECK-NEXT: v_writelane_b32 v43, s48, 8 ; CHECK-NEXT: v_writelane_b32 v43, s49, 9 @@ -434,7 +434,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 @@ -444,7 +444,7 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] ; CHECK-NEXT: s_mov_b32 s12, s53 @@ -466,8 +466,8 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: v_readlane_b32 s50, v43, 10 ; CHECK-NEXT: v_readlane_b32 s49, v43, 9 ; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s47, v43, 7 -; CHECK-NEXT: v_readlane_b32 s46, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 7 +; CHECK-NEXT: v_readlane_b32 s38, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -548,8 +548,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: v_writelane_b32 v42, s35, 3 ; CHECK-NEXT: v_writelane_b32 v42, s36, 4 ; CHECK-NEXT: v_writelane_b32 v42, s37, 5 -; CHECK-NEXT: v_writelane_b32 v42, s46, 6 -; CHECK-NEXT: v_writelane_b32 v42, s47, 7 +; CHECK-NEXT: v_writelane_b32 v42, s38, 6 +; CHECK-NEXT: v_writelane_b32 v42, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v42, s48, 8 ; CHECK-NEXT: v_writelane_b32 v42, s49, 9 @@ -573,7 +573,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -584,7 +584,7 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] ; CHECK-NEXT: s_mov_b32 s12, s53 @@ -602,8 +602,8 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: v_readlane_b32 s50, v42, 10 ; CHECK-NEXT: v_readlane_b32 s49, v42, 9 ; CHECK-NEXT: v_readlane_b32 s48, v42, 8 -; CHECK-NEXT: v_readlane_b32 s47, v42, 7 -; CHECK-NEXT: v_readlane_b32 s46, v42, 6 +; CHECK-NEXT: v_readlane_b32 s39, v42, 7 +; CHECK-NEXT: v_readlane_b32 s38, v42, 6 ; CHECK-NEXT: v_readlane_b32 s37, v42, 5 ; CHECK-NEXT: v_readlane_b32 s36, v42, 4 ; CHECK-NEXT: v_readlane_b32 s35, v42, 3 @@ -689,8 +689,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s46, 6 -; CHECK-NEXT: v_writelane_b32 v43, s47, 7 +; CHECK-NEXT: v_writelane_b32 v43, s38, 6 +; CHECK-NEXT: v_writelane_b32 v43, s39, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 ; CHECK-NEXT: v_writelane_b32 v43, s48, 8 ; CHECK-NEXT: v_writelane_b32 v43, s49, 9 @@ -716,7 +716,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: v_or_b32_e32 v42, 1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -727,7 +727,7 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] ; CHECK-NEXT: s_mov_b32 s12, s53 @@ -748,8 +748,8 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: v_readlane_b32 s50, v43, 10 ; CHECK-NEXT: v_readlane_b32 s49, v43, 9 ; CHECK-NEXT: v_readlane_b32 s48, v43, 8 -; CHECK-NEXT: v_readlane_b32 s47, v43, 7 -; CHECK-NEXT: v_readlane_b32 s46, v43, 6 +; CHECK-NEXT: v_readlane_b32 s39, v43, 7 +; CHECK-NEXT: v_readlane_b32 s38, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 10d6d09441ee5..91598496eb984 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -40708,21 +40708,21 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GCN-NEXT: v_and_b32_e32 v8, 1, v14 ; GCN-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v7 ; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 -; GCN-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v8 +; GCN-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v8 ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:64 ; GCN-NEXT: v_and_b32_e32 v9, 1, v15 -; GCN-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v9 +; GCN-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v9 ; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:60 ; GCN-NEXT: s_waitcnt vmcnt(2) ; GCN-NEXT: v_mul_f32_e32 v7, 1.0, v7 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v8 -; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[40:41] +; GCN-NEXT: v_cndmask_b32_e64 v15, v8, v7, s[42:43] ; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:56 ; GCN-NEXT: v_mul_f32_e32 v8, 1.0, v30 ; GCN-NEXT: s_waitcnt vmcnt(1) ; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v9 -; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[38:39] +; GCN-NEXT: v_cndmask_b32_e64 v14, v9, v8, s[40:41] ; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:52 ; GCN-NEXT: v_mul_f32_e32 v9, 1.0, v29 ; GCN-NEXT: s_waitcnt vmcnt(1) @@ -40967,9 +40967,9 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX8-NEXT: v_and_b32_e32 v0, 1, v13 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v14 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v15 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v15 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v22 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v30 ; GFX8-NEXT: v_cndmask_b32_e64 v6, v1, v0, s[28:29] @@ -40995,9 +40995,9 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x ; GFX8-NEXT: v_or_b32_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v5, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v10, v0, v23, s[40:41] ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v11, v0, v1, s[42:43] ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v19 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v27 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v1, v0, s[16:17] @@ -42028,49 +42028,51 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_and_b32_e32 v0, 1, v13 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v14 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v15 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v16 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v15 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v17 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v16 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v17 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v18 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v19 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v20 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v19 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v21 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v20 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v21 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0 ; GFX8-NEXT: v_and_b32_e32 v0, 1, v22 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v23 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v24 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v23 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v25 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v24 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v26 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v27 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v25 ; GFX8-NEXT: v_writelane_b32 v34, s30, 0 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v28 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v26 ; GFX8-NEXT: v_writelane_b32 v34, s31, 1 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v29 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v27 ; GFX8-NEXT: v_writelane_b32 v34, s34, 2 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v30 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v28 ; GFX8-NEXT: v_writelane_b32 v34, s35, 3 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 -; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[30:31], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v29 ; GFX8-NEXT: v_writelane_b32 v34, s36, 4 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[34:35], 1, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 1, v30 ; GFX8-NEXT: v_writelane_b32 v34, s37, 5 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 +; GFX8-NEXT: buffer_load_ushort v0, off, s[0:3], s32 +; GFX8-NEXT: v_writelane_b32 v34, s38, 6 +; GFX8-NEXT: v_writelane_b32 v34, s39, 7 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[36:37], 1, v0 +; GFX8-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:68 ; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; GFX8-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:72 @@ -42107,40 +42109,40 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v29 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_lshrrev_b32_e32 v28, 16, v32 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[36:37] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[34:35] +; GFX8-NEXT: v_cndmask_b32_e64 v28, v33, v28, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[36:37] ; GFX8-NEXT: v_lshrrev_b32_e32 v32, 16, v31 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v30 -; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[30:31] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[90:91] +; GFX8-NEXT: v_cndmask_b32_e64 v32, v33, v32, s[34:35] +; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, v31, s[30:31] ; GFX8-NEXT: v_lshrrev_b32_e32 v31, 16, v27 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v26 -; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[88:89] -; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v33, v31, s[90:91] +; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[88:89] ; GFX8-NEXT: v_lshrrev_b32_e32 v27, 16, v25 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v24 -; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[76:77] -; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75] +; GFX8-NEXT: v_cndmask_b32_e64 v27, v33, v27, s[78:79] +; GFX8-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[76:77] ; GFX8-NEXT: v_lshrrev_b32_e32 v25, 16, v23 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v22 -; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[72:73] -; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71] +; GFX8-NEXT: v_cndmask_b32_e64 v25, v33, v25, s[74:75] +; GFX8-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[72:73] ; GFX8-NEXT: v_lshrrev_b32_e32 v23, 16, v21 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v20 -; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[60:61] -; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59] +; GFX8-NEXT: v_cndmask_b32_e64 v23, v33, v23, s[62:63] +; GFX8-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[60:61] ; GFX8-NEXT: v_lshrrev_b32_e32 v21, 16, v19 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v18 -; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[56:57] -; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55] +; GFX8-NEXT: v_cndmask_b32_e64 v21, v33, v21, s[58:59] +; GFX8-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[56:57] ; GFX8-NEXT: v_lshrrev_b32_e32 v19, 16, v17 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v16 -; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[44:45] -; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e64 v19, v33, v19, s[46:47] +; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[44:45] ; GFX8-NEXT: v_lshrrev_b32_e32 v17, 16, v15 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v14 -; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[40:41] -; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v17, v33, v17, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[40:41] ; GFX8-NEXT: v_lshrrev_b32_e32 v15, 16, v13 ; GFX8-NEXT: v_lshrrev_b32_e32 v33, 16, v12 ; GFX8-NEXT: v_cndmask_b32_e64 v15, v33, v15, s[28:29] @@ -42201,6 +42203,8 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX8-NEXT: v_or_b32_sdwa v13, v26, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v14, v30, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v15, v29, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_readlane_b32 s39, v34, 7 +; GFX8-NEXT: v_readlane_b32 s38, v34, 6 ; GFX8-NEXT: v_readlane_b32 s37, v34, 5 ; GFX8-NEXT: v_readlane_b32 s36, v34, 4 ; GFX8-NEXT: v_readlane_b32 s35, v34, 3 @@ -42246,37 +42250,37 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX9-NEXT: v_and_b32_e32 v0, 1, v12 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[28:29], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v15 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[38:39], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v14 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[40:41], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v17 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v14 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[42:43], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v16 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v17 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[44:45], 1, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v16 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[46:47], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v19 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[54:55], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v18 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[56:57], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v21 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v18 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[58:59], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v20 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v21 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[60:61], 1, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v20 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[62:63], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v23 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[70:71], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v22 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[72:73], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v25 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v22 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[74:75], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v24 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v25 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[76:77], 1, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v24 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[78:79], 1, v0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v27 -; GFX9-NEXT: v_cmp_eq_u32_e64 s[86:87], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v26 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[88:89], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v29 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v26 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[90:91], 1, v0 -; GFX9-NEXT: v_and_b32_e32 v0, 1, v28 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v29 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[92:93], 1, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 1, v28 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[94:95], 1, v0 ; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], s32 ; GFX9-NEXT: v_writelane_b32 v33, s30, 0 ; GFX9-NEXT: v_writelane_b32 v33, s31, 1 @@ -42326,38 +42330,38 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX9-NEXT: v_lshrrev_b32_e32 v32, 16, v32 ; GFX9-NEXT: v_lshrrev_b32_e32 v31, 16, v31 ; GFX9-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[30:31] -; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[92:93] +; GFX9-NEXT: v_cndmask_b32_e64 v32, v28, v30, s[94:95] ; GFX9-NEXT: v_lshrrev_b32_e32 v30, 16, v30 ; GFX9-NEXT: v_lshrrev_b32_e32 v28, 16, v28 -; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[90:91] -; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[88:89] +; GFX9-NEXT: v_cndmask_b32_e64 v28, v28, v30, s[92:93] +; GFX9-NEXT: v_cndmask_b32_e64 v30, v26, v27, s[90:91] ; GFX9-NEXT: v_lshrrev_b32_e32 v27, 16, v27 ; GFX9-NEXT: v_lshrrev_b32_e32 v26, 16, v26 -; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[86:87] -; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[76:77] +; GFX9-NEXT: v_cndmask_b32_e64 v26, v26, v27, s[88:89] +; GFX9-NEXT: v_cndmask_b32_e64 v27, v24, v25, s[78:79] ; GFX9-NEXT: v_lshrrev_b32_e32 v25, 16, v25 ; GFX9-NEXT: v_lshrrev_b32_e32 v24, 16, v24 -; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[74:75] -; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[72:73] +; GFX9-NEXT: v_cndmask_b32_e64 v24, v24, v25, s[76:77] +; GFX9-NEXT: v_cndmask_b32_e64 v25, v22, v23, s[74:75] ; GFX9-NEXT: v_lshrrev_b32_e32 v23, 16, v23 ; GFX9-NEXT: v_lshrrev_b32_e32 v22, 16, v22 -; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[70:71] -; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[60:61] +; GFX9-NEXT: v_cndmask_b32_e64 v22, v22, v23, s[72:73] +; GFX9-NEXT: v_cndmask_b32_e64 v23, v20, v21, s[62:63] ; GFX9-NEXT: v_lshrrev_b32_e32 v21, 16, v21 ; GFX9-NEXT: v_lshrrev_b32_e32 v20, 16, v20 -; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[58:59] -; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[56:57] +; GFX9-NEXT: v_cndmask_b32_e64 v20, v20, v21, s[60:61] +; GFX9-NEXT: v_cndmask_b32_e64 v21, v18, v19, s[58:59] ; GFX9-NEXT: v_lshrrev_b32_e32 v19, 16, v19 ; GFX9-NEXT: v_lshrrev_b32_e32 v18, 16, v18 -; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[54:55] -; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[44:45] +; GFX9-NEXT: v_cndmask_b32_e64 v18, v18, v19, s[56:57] +; GFX9-NEXT: v_cndmask_b32_e64 v19, v16, v17, s[46:47] ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v17 ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 16, v16 -; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[42:43] -; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[40:41] +; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v17, s[44:45] +; GFX9-NEXT: v_cndmask_b32_e64 v17, v14, v15, s[42:43] ; GFX9-NEXT: v_lshrrev_b32_e32 v15, 16, v15 ; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v14 -; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[38:39] +; GFX9-NEXT: v_cndmask_b32_e64 v14, v14, v15, s[40:41] ; GFX9-NEXT: v_cndmask_b32_e64 v15, v12, v13, s[28:29] ; GFX9-NEXT: v_lshrrev_b32_e32 v13, 16, v13 ; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v12 diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll index 85b9adfe6ea5c..36fa7b97b3c77 100644 --- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll @@ -9,24 +9,24 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0 +; CHECK-NEXT: s_load_dwordx8 s[48:55], s[8:9], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: s_mov_b32 s12, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_cmp_lg_u32 s100, 0 +; CHECK-NEXT: s_cmp_lg_u32 s52, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_8 ; CHECK-NEXT: ; %bb.1: ; %if.end13.i.i -; CHECK-NEXT: s_cmp_eq_u32 s102, 0 +; CHECK-NEXT: s_cmp_eq_u32 s54, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_4 ; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i -; CHECK-NEXT: s_cmp_lg_u32 s103, 0 +; CHECK-NEXT: s_cmp_lg_u32 s55, 0 ; CHECK-NEXT: s_mov_b32 s17, 0 ; CHECK-NEXT: s_cselect_b32 s12, -1, 0 ; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccz .LBB0_5 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: s_mov_b32 s96, 0 +; CHECK-NEXT: s_mov_b32 s48, 0 ; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccz .LBB0_6 ; CHECK-NEXT: s_branch .LBB0_7 @@ -34,16 +34,16 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: s_mov_b32 s14, s12 ; CHECK-NEXT: s_mov_b32 s15, s12 ; CHECK-NEXT: s_mov_b32 s13, s12 -; CHECK-NEXT: s_mov_b64 s[98:99], s[14:15] -; CHECK-NEXT: s_mov_b64 s[96:97], s[12:13] +; CHECK-NEXT: s_mov_b64 s[50:51], s[14:15] +; CHECK-NEXT: s_mov_b64 s[48:49], s[12:13] ; CHECK-NEXT: s_branch .LBB0_7 ; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i -; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s101, 0 -; CHECK-NEXT: s_mov_b32 s96, 1.0 +; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s53, 0 +; CHECK-NEXT: s_mov_b32 s48, 1.0 ; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000 -; CHECK-NEXT: s_mov_b32 s97, s96 -; CHECK-NEXT: s_mov_b32 s98, s96 -; CHECK-NEXT: s_mov_b32 s99, s96 +; CHECK-NEXT: s_mov_b32 s49, s48 +; CHECK-NEXT: s_mov_b32 s50, s48 +; CHECK-NEXT: s_mov_b32 s51, s48 ; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccnz .LBB0_7 ; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i @@ -55,7 +55,7 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0 ; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1 -; CHECK-NEXT: v_add_f32_e64 v1, s17, s96 +; CHECK-NEXT: v_add_f32_e64 v1, s17, s48 ; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] ; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] ; CHECK-NEXT: s_mov_b32 s12, s14 @@ -65,13 +65,13 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: s_mov_b32 s13, s15 ; CHECK-NEXT: s_mov_b32 s14, s16 -; CHECK-NEXT: s_mov_b32 s96, 0 +; CHECK-NEXT: s_mov_b32 s48, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] ; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35] -; CHECK-NEXT: s_mov_b32 s97, s96 -; CHECK-NEXT: s_mov_b32 s98, s96 -; CHECK-NEXT: s_mov_b32 s99, s96 +; CHECK-NEXT: s_mov_b32 s49, s48 +; CHECK-NEXT: s_mov_b32 s50, s48 +; CHECK-NEXT: s_mov_b32 s51, s48 ; CHECK-NEXT: .LBB0_7: ; %if.end294.i.i ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:12 @@ -80,11 +80,11 @@ define amdgpu_kernel void @blender_no_live_segment_at_def_error(<4 x float> %ext ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit ; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20 -; CHECK-NEXT: v_mov_b32_e32 v0, s96 +; CHECK-NEXT: v_mov_b32_e32 v0, s48 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, s97 -; CHECK-NEXT: v_mov_b32_e32 v2, s98 -; CHECK-NEXT: v_mov_b32_e32 v3, s99 +; CHECK-NEXT: v_mov_b32_e32 v1, s49 +; CHECK-NEXT: v_mov_b32_e32 v2, s50 +; CHECK-NEXT: v_mov_b32_e32 v3, s51 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] ; CHECK-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index bce02a4cfacde..e43a021802644 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -79,16 +79,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -111,7 +111,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.6.Flow20: ; GFX90A-NEXT: successors: %bb.7(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr17, implicit $exec ; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr17, implicit $exec @@ -124,7 +124,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.7.Flow19: ; GFX90A-NEXT: successors: %bb.62(0x40000000), %bb.8(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0 ; GFX90A-NEXT: $sgpr24_sgpr25 = S_AND_SAVEEXEC_B64 $sgpr36_sgpr37, implicit-def $exec, implicit-def $scc, implicit $exec @@ -132,7 +132,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.8.Flow32: ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr18_sgpr19, implicit-def $exec, implicit-def $scc, implicit $exec @@ -141,7 +141,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.9.bb89: ; GFX90A-NEXT: successors: %bb.10(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -149,16 +149,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.10.Flow33: ; GFX90A-NEXT: successors: %bb.11(0x40000000), %bb.12(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc - ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.11.bb84: ; GFX90A-NEXT: successors: %bb.12(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -166,16 +166,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.12.Flow34: ; GFX90A-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc - ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr42_sgpr43, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr12_sgpr13 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.14, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.13.bb79: ; GFX90A-NEXT: successors: %bb.14(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -183,16 +183,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.14.Flow35: ; GFX90A-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc - ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $sgpr40_sgpr41, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_XOR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.16, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.15.bb72: ; GFX90A-NEXT: successors: %bb.16(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr0_vgpr1:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr8 = S_ADD_U32 renamable $sgpr8, 48, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr9 = S_ADDC_U32 killed renamable $sgpr9, 0, implicit-def dead $scc, implicit killed $scc @@ -206,7 +206,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.16.Flow36: ; GFX90A-NEXT: successors: %bb.17(0x40000000), %bb.18(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def $scc ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr66_sgpr67, implicit-def $exec, implicit-def $scc, implicit $exec @@ -215,7 +215,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.17.bb67: ; GFX90A-NEXT: successors: %bb.18(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -223,7 +223,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.18.Flow37: ; GFX90A-NEXT: successors: %bb.19(0x40000000), %bb.20(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr64_sgpr65, implicit-def $exec, implicit-def $scc, implicit $exec @@ -232,7 +232,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.19.bb62: ; GFX90A-NEXT: successors: %bb.20(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -240,16 +240,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.20.Flow38: ; GFX90A-NEXT: successors: %bb.21(0x40000000), %bb.22(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr62_sgpr63, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr54_sgpr55, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECZ %bb.22, implicit $exec ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.21.bb54: ; GFX90A-NEXT: successors: %bb.22(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -257,7 +257,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.22.Flow39: ; GFX90A-NEXT: successors: %bb.23(0x40000000), %bb.24(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr52_sgpr53, implicit-def $exec, implicit-def $scc, implicit $exec @@ -266,7 +266,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.23.bb47: ; GFX90A-NEXT: successors: %bb.24(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -274,7 +274,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.24.Flow40: ; GFX90A-NEXT: successors: %bb.25(0x40000000), %bb.26(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr50_sgpr51, implicit-def $exec, implicit-def $scc, implicit $exec @@ -283,7 +283,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.25.bb40: ; GFX90A-NEXT: successors: %bb.26(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -291,7 +291,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.26.Flow41: ; GFX90A-NEXT: successors: %bb.27(0x40000000), %bb.28(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr48_sgpr49, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr48_sgpr49, implicit-def $exec, implicit-def $scc, implicit $exec @@ -300,7 +300,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.27.bb33: ; GFX90A-NEXT: successors: %bb.28(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET renamable $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null` + 4, basealign 8, addrspace 5) ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(5) null`, align 8, addrspace 5) @@ -308,10 +308,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.28.Flow42: ; GFX90A-NEXT: successors: %bb.34(0x40000000), %bb.29(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr46_sgpr47, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr4_sgpr5, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr68_sgpr69, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc - ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr46_sgpr47, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX90A-NEXT: $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 $sgpr38_sgpr39, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: renamable $sgpr4_sgpr5 = S_XOR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.34, implicit $exec ; GFX90A-NEXT: {{ $}} @@ -365,13 +365,13 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec :: (load (s8) from %ir.i21, addrspace 1) ; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr43, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec @@ -400,22 +400,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.36.Flow21: ; GFX90A-NEXT: successors: %bb.6(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr24_sgpr25, implicit-def $scc ; GFX90A-NEXT: S_BRANCH %bb.6 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.37.bb27: ; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr42_sgpr43, $sgpr64_sgpr65, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr66_sgpr67, $sgpr48_sgpr49, $sgpr50_sgpr51 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr46_sgpr47, $sgpr42_sgpr43, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec :: (load (s8) from %ir.i28, addrspace 1) ; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = COPY renamable $sgpr36_sgpr37 - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr45, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF @@ -440,34 +440,34 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.38.Flow22: ; GFX90A-NEXT: successors: %bb.36(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr38_sgpr39, implicit-def $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr40_sgpr41, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_ANDN2_B64 killed renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_OR_B64 killed renamable $sgpr36_sgpr37, killed renamable $sgpr44_sgpr45, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.36 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.39.bb34: ; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr46_sgpr47, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec :: (load (s8) from %ir.i35, addrspace 1) ; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = COPY renamable $sgpr36_sgpr37 - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr57, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF @@ -495,23 +495,23 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr40_sgpr41, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr42_sgpr43, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.38 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.41.bb41: ; GFX90A-NEXT: successors: %bb.46(0x40000000), %bb.42(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr60_sgpr61, $sgpr66_sgpr67 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr52_sgpr53, $sgpr50_sgpr51, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc @@ -519,7 +519,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec :: (load (s8) from %ir.i42, addrspace 1) ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $vgpr18, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF @@ -542,22 +542,22 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.42.Flow24: ; GFX90A-NEXT: successors: %bb.40(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr42_sgpr43, implicit-def $scc ; GFX90A-NEXT: renamable $vgpr59 = COPY killed renamable $vgpr18, implicit $exec ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr18_sgpr19, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.40 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.43.bb55: @@ -596,29 +596,30 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.47(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $sgpr58_sgpr59, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_XOR_B64 $exec, -1, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_XOR_B64 $exec, -1, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr70_sgpr71 = S_AND_B64 killed renamable $sgpr46_sgpr47, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr56_sgpr57, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr58_sgpr59, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr54_sgpr55, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr46_sgpr47, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.47 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.46.bb48: ; GFX90A-NEXT: successors: %bb.43(0x40000000), %bb.47(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr60_sgpr61, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr33, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67, $sgpr46_sgpr47, $sgpr54_sgpr55, $sgpr58_sgpr59, $sgpr56_sgpr57 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr60 = V_ADD_CO_U32_e32 5120, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc ; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec ; GFX90A-NEXT: renamable $vgpr1, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec :: (load (s8) from %ir.i49, addrspace 1) - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 -1 - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr36_sgpr37 + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 -1 + ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37 ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr61, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = V_CMP_EQ_U16_e64 0, killed $vgpr0, implicit $exec @@ -642,20 +643,20 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.47.Flow25: ; GFX90A-NEXT: successors: %bb.42(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr15, $vgpr17, $vgpr18, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr60_sgpr61, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr18_sgpr19, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_XOR_B64 $exec, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_AND_B64 killed renamable $sgpr70_sgpr71, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr58_sgpr59 = S_AND_B64 killed renamable $sgpr68_sgpr69, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_AND_B64 killed renamable $sgpr66_sgpr67, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_AND_B64 killed renamable $sgpr50_sgpr51, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr60_sgpr61, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr64_sgpr65, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_AND_B64 killed renamable $sgpr48_sgpr49, $exec, implicit-def dead $scc ; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_ANDN2_B64 renamable $sgpr36_sgpr37, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_AND_B64 killed renamable $sgpr52_sgpr53, $exec, implicit-def dead $scc - ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr52_sgpr53, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_AND_B64 killed renamable $sgpr62_sgpr63, $exec, implicit-def dead $scc + ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_OR_B64 killed renamable $sgpr44_sgpr45, killed renamable $sgpr48_sgpr49, implicit-def dead $scc ; GFX90A-NEXT: S_BRANCH %bb.42 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.48.bb63: @@ -790,16 +791,16 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr15 = COPY killed renamable $sgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr17 = COPY killed renamable $sgpr17, implicit $exec + ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr66_sgpr67 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr64_sgpr65 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr62_sgpr63 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr52_sgpr53 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0 - ; GFX90A-NEXT: renamable $sgpr46_sgpr47 = S_MOV_B64 0 + ; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vgpr8_vgpr9 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr6_vgpr7 = IMPLICIT_DEF ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = IMPLICIT_DEF @@ -883,7 +884,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.62.bb140: ; GFX90A-NEXT: successors: %bb.68(0x40000000), %bb.63(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 -1 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc @@ -891,14 +892,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.63.Flow13: ; GFX90A-NEXT: successors: %bb.64(0x40000000), %bb.66(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.66, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.64.bb159: ; GFX90A-NEXT: successors: %bb.67(0x40000000), %bb.65(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vcc = V_CMP_NE_U32_e64 0, killed $vgpr30, implicit $exec ; GFX90A-NEXT: $sgpr12_sgpr13 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec @@ -907,21 +908,21 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.65.Flow10: ; GFX90A-NEXT: successors: %bb.66(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $sgpr12_sgpr13 = S_ANDN2_SAVEEXEC_B64 $sgpr12_sgpr13, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def $scc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.66.Flow14: ; GFX90A-NEXT: successors: %bb.8(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr68_sgpr69 = COPY $exec ; GFX90A-NEXT: S_BRANCH %bb.8 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.67.bb161: ; GFX90A-NEXT: successors: %bb.65(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr21, killed $vgpr23, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = V_OR_B32_e32 killed $vgpr2, killed $vgpr25, implicit $exec @@ -940,7 +941,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.68.bb174: ; GFX90A-NEXT: successors: %bb.72(0x40000000), %bb.69(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr26 = V_OR_B32_e32 1, $vgpr24, implicit $exec ; GFX90A-NEXT: renamable $vgpr48 = V_OR_B32_e32 $vgpr26, $vgpr22, implicit $exec @@ -956,14 +957,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.69.Flow: ; GFX90A-NEXT: successors: %bb.70(0x40000000), %bb.71(0x40000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.71, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.70.bb186: ; GFX90A-NEXT: successors: %bb.71(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 3, killed $vgpr2_vgpr3, implicit $exec ; GFX90A-NEXT: renamable $vgpr10 = COPY renamable $sgpr27, implicit $exec @@ -992,14 +993,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.71.Flow9: ; GFX90A-NEXT: successors: %bb.63(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $sgpr36_sgpr37 = S_MOV_B64 0 ; GFX90A-NEXT: S_BRANCH %bb.63 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.72.bb196: ; GFX90A-NEXT: successors: %bb.69(0x80000000) - ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000C, $vgpr12_vgpr13:0x000000000000000C, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr20_vgpr21:0x000000000000000C, $vgpr22_vgpr23:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x0000000000000003, $vgpr28_vgpr29:0x0000000000000003, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr10 = V_OR_B32_e32 $vgpr50, killed $vgpr16, implicit $exec ; GFX90A-NEXT: renamable $vgpr54 = V_OR_B32_e32 killed $vgpr10, killed $vgpr14, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index 7c0c433ac3c51..83ab6c32aee96 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -909,39 +909,39 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: v_writelane_b32 v0, s35, 4 ; CHECK-NEXT: v_writelane_b32 v0, s36, 5 ; CHECK-NEXT: v_writelane_b32 v0, s37, 6 -; CHECK-NEXT: v_writelane_b32 v0, s46, 7 -; CHECK-NEXT: v_writelane_b32 v0, s47, 8 +; CHECK-NEXT: v_writelane_b32 v0, s38, 7 +; CHECK-NEXT: v_writelane_b32 v0, s39, 8 ; CHECK-NEXT: v_writelane_b32 v0, s48, 9 ; CHECK-NEXT: v_writelane_b32 v0, s49, 10 ; CHECK-NEXT: v_writelane_b32 v0, s50, 11 ; CHECK-NEXT: v_writelane_b32 v0, s51, 12 ; CHECK-NEXT: v_writelane_b32 v0, s52, 13 ; CHECK-NEXT: v_writelane_b32 v0, s53, 14 -; CHECK-NEXT: v_writelane_b32 v0, s62, 15 -; CHECK-NEXT: v_writelane_b32 v0, s63, 16 +; CHECK-NEXT: v_writelane_b32 v0, s54, 15 +; CHECK-NEXT: v_writelane_b32 v0, s55, 16 ; CHECK-NEXT: v_writelane_b32 v0, s64, 17 ; CHECK-NEXT: v_writelane_b32 v0, s65, 18 ; CHECK-NEXT: v_writelane_b32 v0, s66, 19 ; CHECK-NEXT: v_writelane_b32 v0, s67, 20 ; CHECK-NEXT: v_writelane_b32 v0, s68, 21 ; CHECK-NEXT: v_writelane_b32 v0, s69, 22 -; CHECK-NEXT: v_writelane_b32 v0, s78, 23 -; CHECK-NEXT: v_writelane_b32 v0, s79, 24 +; CHECK-NEXT: v_writelane_b32 v0, s70, 23 +; CHECK-NEXT: v_writelane_b32 v0, s71, 24 ; CHECK-NEXT: v_writelane_b32 v0, s80, 25 ; CHECK-NEXT: v_writelane_b32 v0, s81, 26 ; CHECK-NEXT: v_writelane_b32 v0, s82, 27 ; CHECK-NEXT: v_writelane_b32 v0, s83, 28 ; CHECK-NEXT: v_writelane_b32 v0, s84, 29 ; CHECK-NEXT: v_writelane_b32 v0, s85, 30 -; CHECK-NEXT: v_writelane_b32 v0, s94, 31 -; CHECK-NEXT: v_writelane_b32 v0, s95, 32 +; CHECK-NEXT: v_writelane_b32 v0, s86, 31 +; CHECK-NEXT: v_writelane_b32 v0, s87, 32 ; CHECK-NEXT: v_writelane_b32 v0, s96, 33 ; CHECK-NEXT: v_writelane_b32 v0, s97, 34 ; CHECK-NEXT: v_writelane_b32 v0, s98, 35 ; CHECK-NEXT: v_writelane_b32 v0, s99, 36 -; CHECK-NEXT: s_mov_b32 s38, s12 +; CHECK-NEXT: s_mov_b32 s40, s12 ; CHECK-NEXT: v_writelane_b32 v0, s100, 37 -; CHECK-NEXT: s_cmp_eq_u32 s38, 0 +; CHECK-NEXT: s_cmp_eq_u32 s40, 0 ; CHECK-NEXT: v_writelane_b32 v0, s101, 38 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 @@ -1602,32 +1602,32 @@ define void @spill_func(ptr addrspace(1) %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s98, v0, 35 ; CHECK-NEXT: v_readlane_b32 s97, v0, 34 ; CHECK-NEXT: v_readlane_b32 s96, v0, 33 -; CHECK-NEXT: v_readlane_b32 s95, v0, 32 -; CHECK-NEXT: v_readlane_b32 s94, v0, 31 +; CHECK-NEXT: v_readlane_b32 s87, v0, 32 +; CHECK-NEXT: v_readlane_b32 s86, v0, 31 ; CHECK-NEXT: v_readlane_b32 s85, v0, 30 ; CHECK-NEXT: v_readlane_b32 s84, v0, 29 ; CHECK-NEXT: v_readlane_b32 s83, v0, 28 ; CHECK-NEXT: v_readlane_b32 s82, v0, 27 ; CHECK-NEXT: v_readlane_b32 s81, v0, 26 ; CHECK-NEXT: v_readlane_b32 s80, v0, 25 -; CHECK-NEXT: v_readlane_b32 s79, v0, 24 -; CHECK-NEXT: v_readlane_b32 s78, v0, 23 +; CHECK-NEXT: v_readlane_b32 s71, v0, 24 +; CHECK-NEXT: v_readlane_b32 s70, v0, 23 ; CHECK-NEXT: v_readlane_b32 s69, v0, 22 ; CHECK-NEXT: v_readlane_b32 s68, v0, 21 ; CHECK-NEXT: v_readlane_b32 s67, v0, 20 ; CHECK-NEXT: v_readlane_b32 s66, v0, 19 ; CHECK-NEXT: v_readlane_b32 s65, v0, 18 ; CHECK-NEXT: v_readlane_b32 s64, v0, 17 -; CHECK-NEXT: v_readlane_b32 s63, v0, 16 -; CHECK-NEXT: v_readlane_b32 s62, v0, 15 +; CHECK-NEXT: v_readlane_b32 s55, v0, 16 +; CHECK-NEXT: v_readlane_b32 s54, v0, 15 ; CHECK-NEXT: v_readlane_b32 s53, v0, 14 ; CHECK-NEXT: v_readlane_b32 s52, v0, 13 ; CHECK-NEXT: v_readlane_b32 s51, v0, 12 ; CHECK-NEXT: v_readlane_b32 s50, v0, 11 ; CHECK-NEXT: v_readlane_b32 s49, v0, 10 ; CHECK-NEXT: v_readlane_b32 s48, v0, 9 -; CHECK-NEXT: v_readlane_b32 s47, v0, 8 -; CHECK-NEXT: v_readlane_b32 s46, v0, 7 +; CHECK-NEXT: v_readlane_b32 s39, v0, 8 +; CHECK-NEXT: v_readlane_b32 s38, v0, 7 ; CHECK-NEXT: v_readlane_b32 s37, v0, 6 ; CHECK-NEXT: v_readlane_b32 s36, v0, 5 ; CHECK-NEXT: v_readlane_b32 s35, v0, 4 diff --git a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll index 394c32c8e4bcf..c10cb0ae6d336 100644 --- a/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/call-args-inreg.ll @@ -1385,15 +1385,15 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s29, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[38:39] +; GFX9-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-NEXT: v_writelane_b32 v40, s29, 2 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: s_getpc_b64 s[38:39] -; GFX9-NEXT: s_add_u32 s38, s38, external_void_func_a15i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s39, s39, external_void_func_a15i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[40:41] +; GFX9-NEXT: s_add_u32 s40, s40, external_void_func_a15i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s41, s41, external_void_func_a15i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_mov_b32 s3, s19 ; GFX9-NEXT: s_mov_b32 s2, s18 ; GFX9-NEXT: s_mov_b32 s1, s17 @@ -1408,7 +1408,7 @@ define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) # ; GFX9-NEXT: s_mov_b32 s23, s27 ; GFX9-NEXT: s_mov_b32 s24, s28 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[38:39] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[40:41] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_mov_b32 s32, s33 diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 48f32a87203a3..2365c68a7cb0b 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -63,18 +63,18 @@ declare hidden void @external_void_func_v16i8(<16 x i8>) #0 define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; VI-LABEL: test_call_external_void_func_i1_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -82,18 +82,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_i1_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -101,18 +101,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_i1_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -155,18 +155,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -178,18 +178,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -201,18 +201,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -265,18 +265,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: v_and_b32_e32 v0, 1, v0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -288,18 +288,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: v_and_b32_e32 v0, 1, v0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -311,18 +311,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -370,18 +370,18 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_i8_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -389,18 +389,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_i8_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -408,18 +408,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_i8_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -463,18 +463,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -485,18 +485,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -507,18 +507,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -567,18 +567,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -589,18 +589,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -611,18 +611,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -667,18 +667,18 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_i16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -686,18 +686,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_i16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -705,18 +705,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_i16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -759,18 +759,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -781,18 +781,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -803,18 +803,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -863,18 +863,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm @@ -885,18 +885,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; CI-NEXT: s_waitcnt vmcnt(0) -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm @@ -907,18 +907,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -963,18 +963,18 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -982,18 +982,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1001,18 +1001,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1051,18 +1051,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; VI-LABEL: test_call_external_void_func_i64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x7b ; VI-NEXT: v_mov_b32_e32 v1, 0 ; VI-NEXT: s_mov_b32 s32, 0 @@ -1071,18 +1071,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_i64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0x7b ; CI-NEXT: v_mov_b32_e32 v1, 0 ; CI-NEXT: s_mov_b32 s32, 0 @@ -1091,18 +1091,18 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_i64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -1143,69 +1143,69 @@ define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; VI-LABEL: test_call_external_void_func_v2i64: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b32 s0, 0 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i64: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b32 s0, 0 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v2i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -1252,18 +1252,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2i64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -1274,18 +1274,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2i64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -1296,18 +1296,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2i64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -1353,23 +1353,23 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; VI-LABEL: test_call_external_void_func_v3i64: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b32 s0, 0 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 2 ; VI-NEXT: s_mov_b32 s32, 0 @@ -1378,23 +1378,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; ; CI-LABEL: test_call_external_void_func_v3i64: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b32 s0, 0 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 2 ; CI-NEXT: s_mov_b32 s32, 0 @@ -1403,23 +1403,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -1473,23 +1473,23 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; VI-LABEL: test_call_external_void_func_v4i64: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_mov_b32 s0, 0 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v4, 1 ; VI-NEXT: v_mov_b32_e32 v5, 2 ; VI-NEXT: v_mov_b32_e32 v6, 3 @@ -1500,23 +1500,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; ; CI-LABEL: test_call_external_void_func_v4i64: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_mov_b32 s0, 0 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v4, 1 ; CI-NEXT: v_mov_b32_e32 v5, 2 ; CI-NEXT: v_mov_b32_e32 v6, 3 @@ -1527,23 +1527,23 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i64: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_mov_b32_e32 v6, 3 @@ -1601,18 +1601,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; VI-LABEL: test_call_external_void_func_f16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1620,18 +1620,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_f16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1639,18 +1639,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_f16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1689,18 +1689,18 @@ define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 4.0 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1708,18 +1708,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 4.0 ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1727,18 +1727,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -1777,18 +1777,18 @@ define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: s_mov_b32 s32, 0 @@ -1797,18 +1797,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: s_mov_b32 s32, 0 @@ -1817,18 +1817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -1869,18 +1869,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1890,18 +1890,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1911,18 +1911,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1966,18 +1966,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v5f32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1.0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -1989,18 +1989,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v5f32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2012,18 +2012,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v5f32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2072,18 +2072,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_f64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 0x40100000 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2092,18 +2092,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_f64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 0x40100000 ; CI-NEXT: s_mov_b32 s32, 0 @@ -2112,18 +2112,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_f64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -2164,18 +2164,18 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2f64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 0 @@ -2186,18 +2186,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2f64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 0 @@ -2208,18 +2208,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2f64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2265,18 +2265,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f64_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: v_mov_b32_e32 v1, 2.0 ; VI-NEXT: v_mov_b32_e32 v2, 0 @@ -2289,18 +2289,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3f64_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 0 @@ -2313,18 +2313,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f64_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 @@ -2375,42 +2375,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; VI-LABEL: test_call_external_void_func_v2i16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 @@ -2419,21 +2419,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2476,42 +2476,42 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; VI-LABEL: test_call_external_void_func_v3i16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3i16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16 @@ -2522,21 +2522,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2579,42 +2579,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; VI-LABEL: test_call_external_void_func_v3f16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v3f16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 @@ -2626,21 +2626,21 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2683,18 +2683,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3i16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x20001 ; VI-NEXT: v_mov_b32_e32 v1, 3 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2703,18 +2703,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3i16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -2724,18 +2724,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -2776,18 +2776,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v3f16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; VI-NEXT: v_mov_b32_e32 v1, 0x4400 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2796,18 +2796,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v3f16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1.0 ; CI-NEXT: v_mov_b32_e32 v1, 2.0 ; CI-NEXT: v_mov_b32_e32 v2, 4.0 @@ -2817,18 +2817,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3f16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -2870,42 +2870,42 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; VI-LABEL: test_call_external_void_func_v4i16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 @@ -2917,21 +2917,21 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -2974,18 +2974,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; VI-LABEL: test_call_external_void_func_v4i16_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 0x20001 ; VI-NEXT: v_mov_b32_e32 v1, 0x40003 ; VI-NEXT: s_mov_b32 s32, 0 @@ -2994,18 +2994,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v4i16_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -3016,18 +3016,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i16_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -3069,42 +3069,42 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; VI-LABEL: test_call_external_void_func_v2f16: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2f16: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 @@ -3115,21 +3115,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2f16: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3172,63 +3172,63 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { ; VI-LABEL: test_call_external_void_func_v2i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v2i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v2i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3271,18 +3271,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v2i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: s_mov_b32 s32, 0 @@ -3291,18 +3291,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v2i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: s_mov_b32 s32, 0 @@ -3311,18 +3311,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v2i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: s_mov_b32 s32, 0 @@ -3363,18 +3363,18 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; VI-LABEL: test_call_external_void_func_v3i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: v_mov_b32_e32 v1, 4 ; VI-NEXT: v_mov_b32_e32 v2, 5 @@ -3384,18 +3384,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_v3i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: v_mov_b32_e32 v1, 4 ; CI-NEXT: v_mov_b32_e32 v2, 5 @@ -3405,18 +3405,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 @@ -3460,18 +3460,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_v3i32_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 3 ; VI-NEXT: v_mov_b32_e32 v1, 4 ; VI-NEXT: v_mov_b32_e32 v2, 5 @@ -3482,18 +3482,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; ; CI-LABEL: test_call_external_void_func_v3i32_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 3 ; CI-NEXT: v_mov_b32_e32 v1, 4 ; CI-NEXT: v_mov_b32_e32 v2, 5 @@ -3504,18 +3504,18 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { ; ; GFX9-LABEL: test_call_external_void_func_v3i32_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 3 ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 @@ -3561,63 +3561,63 @@ define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { ; VI-LABEL: test_call_external_void_func_v4i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v4i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v4i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3660,40 +3660,40 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v4i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 ; VI-NEXT: v_mov_b32_e32 v3, 4 ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: s_endpgm -; -; CI-LABEL: test_call_external_void_func_v4i32_imm: -; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_endpgm +; +; CI-LABEL: test_call_external_void_func_v4i32_imm: +; CI: ; %bb.0: +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -3704,18 +3704,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v4i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3761,18 +3761,18 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v5i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -3784,18 +3784,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v5i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -3807,18 +3807,18 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v5i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -3867,72 +3867,72 @@ define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { ; VI-LABEL: test_call_external_void_func_v8i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v8i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v8i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -3983,18 +3983,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; VI-LABEL: test_call_external_void_func_v8i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: v_mov_b32_e32 v0, 1 ; VI-NEXT: v_mov_b32_e32 v1, 2 ; VI-NEXT: v_mov_b32_e32 v2, 3 @@ -4009,18 +4009,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; ; CI-LABEL: test_call_external_void_func_v8i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: v_mov_b32_e32 v0, 1 ; CI-NEXT: v_mov_b32_e32 v1, 2 ; CI-NEXT: v_mov_b32_e32 v2, 3 @@ -4035,18 +4035,18 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v8i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_mov_b32_e32 v2, 3 @@ -4102,13 +4102,13 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; VI-LABEL: test_call_external_void_func_v16i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) @@ -4116,25 +4116,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v16i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) @@ -4142,25 +4142,25 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v16i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -4168,12 +4168,12 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 ; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 ; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -4231,8 +4231,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 @@ -4242,19 +4242,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_getpc_b64 s[8:9] ; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(7) -; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32 +; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; VI-NEXT: s_endpgm ; @@ -4263,8 +4263,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 @@ -4274,19 +4274,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_getpc_b64 s[8:9] ; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(7) -; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32 +; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[8:9] ; CI-NEXT: s_endpgm ; @@ -4295,8 +4295,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 @@ -4306,19 +4306,19 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_getpc_b64 s[8:9] ; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_waitcnt vmcnt(7) -; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32 +; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GFX9-NEXT: s_endpgm ; @@ -4384,15 +4384,15 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_v32i32_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 @@ -4404,30 +4404,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(8) -; VI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4 +; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(8) -; VI-NEXT: buffer_store_dword v31, off, s[48:51], s32 +; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_v32i32_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 ; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; CI-NEXT: s_mov_b32 s7, 0xf000 ; CI-NEXT: s_mov_b32 s6, -1 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 @@ -4439,30 +4439,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(8) -; CI-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4 +; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(8) -; CI-NEXT: buffer_store_dword v31, off, s[48:51], s32 +; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_v32i32_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0 ; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 @@ -4474,16 +4474,16 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 ; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_waitcnt vmcnt(8) -; GFX9-NEXT: buffer_store_dword v32, off, s[48:51], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(8) -; GFX9-NEXT: buffer_store_dword v31, off, s[48:51], s32 +; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm ; @@ -4557,89 +4557,89 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { ; VI-LABEL: test_call_external_i32_func_i32_imm: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s66, -1 -; VI-NEXT: s_mov_b32 s67, 0xe80000 -; VI-NEXT: s_add_u32 s64, s64, s5 -; VI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24 -; VI-NEXT: s_addc_u32 s65, s65, 0 +; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s51, 0xe80000 +; VI-NEXT: s_add_u32 s48, s48, s5 +; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 +; VI-NEXT: s_addc_u32 s49, s49, 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[64:65] +; VI-NEXT: s_mov_b64 s[0:1], s[48:49] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[66:67] +; VI-NEXT: s_mov_b64 s[2:3], s[50:51] ; VI-NEXT: v_mov_b32_e32 v0, 42 ; VI-NEXT: s_mov_b32 s32, 0 -; VI-NEXT: s_mov_b32 s51, 0xf000 -; VI-NEXT: s_mov_b32 s50, -1 +; VI-NEXT: s_mov_b32 s39, 0xf000 +; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_i32_func_i32_imm: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s66, -1 -; CI-NEXT: s_mov_b32 s67, 0xe8f000 -; CI-NEXT: s_add_u32 s64, s64, s5 -; CI-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x9 -; CI-NEXT: s_addc_u32 s65, s65, 0 +; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s51, 0xe8f000 +; CI-NEXT: s_add_u32 s48, s48, s5 +; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[64:65] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[66:67] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: v_mov_b32_e32 v0, 42 ; CI-NEXT: s_mov_b32 s32, 0 -; CI-NEXT: s_mov_b32 s51, 0xf000 -; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s39, 0xf000 +; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_i32_func_i32_imm: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s66, -1 -; GFX9-NEXT: s_mov_b32 s67, 0xe00000 -; GFX9-NEXT: s_add_u32 s64, s64, s5 -; GFX9-NEXT: s_load_dwordx2 s[48:49], s[2:3], 0x24 -; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s5 +; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_mov_b32 s51, 0xf000 -; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xf000 +; GFX9-NEXT: s_mov_b32 s38, -1 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm ; ; GFX11-LABEL: test_call_external_i32_func_i32_imm: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b64 s[48:49], s[2:3], 0x24 +; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24 ; GFX11-NEXT: v_mov_b32_e32 v0, 42 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12 ; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX11-NEXT: s_mov_b32 s32, 0 -; GFX11-NEXT: s_mov_b32 s51, 0x31016000 -; GFX11-NEXT: s_mov_b32 s50, -1 +; GFX11-NEXT: s_mov_b32 s39, 0x31016000 +; GFX11-NEXT: s_mov_b32 s38, -1 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: buffer_store_b32 v0, off, s[48:51], 0 dlc +; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_endpgm ; @@ -4647,7 +4647,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s8, s8, s11 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 -; HSA-NEXT: s_load_dwordx2 s[48:49], s[6:7], 0x0 +; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0 ; HSA-NEXT: s_add_u32 s0, s0, s11 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 @@ -4657,10 +4657,10 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) ; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] ; HSA-NEXT: v_mov_b32_e32 v0, 42 ; HSA-NEXT: s_mov_b32 s32, 0 -; HSA-NEXT: s_mov_b32 s51, 0x1100f000 -; HSA-NEXT: s_mov_b32 s50, -1 +; HSA-NEXT: s_mov_b32 s39, 0x1100f000 +; HSA-NEXT: s_mov_b32 s38, -1 ; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] -; HSA-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; HSA-NEXT: s_waitcnt vmcnt(0) ; HSA-NEXT: s_endpgm %val = call i32 @external_i32_func_i32(i32 42) @@ -4671,72 +4671,72 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { ; VI-LABEL: test_call_external_void_func_struct_i8_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_struct_i8_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm @@ -4787,86 +4787,86 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: v_mov_b32_e32 v0, 3 -; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0 +; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; VI-NEXT: v_mov_b32_e32 v0, 8 -; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 -; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 -; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_movk_i32 s32, 0x400 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32 +; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; VI-NEXT: s_endpgm ; ; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: v_mov_b32_e32 v0, 3 -; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0 +; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; CI-NEXT: v_mov_b32_e32 v0, 8 -; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 -; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 -; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_movk_i32 s32, 0x400 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 +; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32 +; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0 +; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_movk_i32 s32, 0x400 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32 +; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: s_endpgm ; @@ -4923,33 +4923,33 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { ; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s5 -; VI-NEXT: s_addc_u32 s49, s49, 0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s5 +; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: v_mov_b32_e32 v0, 3 -; VI-NEXT: buffer_store_byte v0, off, s[48:51], 0 +; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; VI-NEXT: v_mov_b32_e32 v0, 8 -; VI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 -; VI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 -; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; VI-NEXT: s_movk_i32 s32, 0x800 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; VI-NEXT: s_waitcnt vmcnt(1) -; VI-NEXT: buffer_store_dword v1, off, s[48:51], s32 +; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; VI-NEXT: v_mov_b32_e32 v0, 8 ; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; VI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8 -; VI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12 +; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 +; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(1) @@ -4961,33 +4961,33 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; ; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s5 -; CI-NEXT: s_addc_u32 s49, s49, 0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s5 +; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: v_mov_b32_e32 v0, 3 -; CI-NEXT: buffer_store_byte v0, off, s[48:51], 0 +; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; CI-NEXT: v_mov_b32_e32 v0, 8 -; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 -; CI-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 -; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 +; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; CI-NEXT: s_movk_i32 s32, 0x800 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 +; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; CI-NEXT: s_waitcnt vmcnt(1) -; CI-NEXT: buffer_store_dword v1, off, s[48:51], s32 +; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; CI-NEXT: v_mov_b32_e32 v0, 8 ; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CI-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8 -; CI-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12 +; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 +; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt vmcnt(1) @@ -4999,34 +4999,34 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; ; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s5 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s5 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 3 -; GFX9-NEXT: buffer_store_byte v0, off, s[48:51], 0 +; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], 0 offset:4 -; GFX9-NEXT: buffer_load_dword v0, off, s[48:51], 0 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 +; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v0, off, s[48:51], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: buffer_store_dword v1, off, s[48:51], s32 +; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: buffer_load_ubyte v0, off, s[48:51], 0 offset:8 -; GFX9-NEXT: buffer_load_dword v1, off, s[48:51], 0 offset:12 +; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 +; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt vmcnt(1) @@ -5121,23 +5121,23 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; VI-LABEL: test_call_external_void_func_v16i8: ; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; VI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s50, -1 -; VI-NEXT: s_mov_b32 s51, 0xe80000 -; VI-NEXT: s_add_u32 s48, s48, s3 +; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s38, -1 +; VI-NEXT: s_mov_b32 s39, 0xe80000 +; VI-NEXT: s_add_u32 s36, s36, s3 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; VI-NEXT: s_addc_u32 s49, s49, 0 -; VI-NEXT: s_mov_b64 s[0:1], s[48:49] +; VI-NEXT: s_addc_u32 s37, s37, 0 +; VI-NEXT: s_mov_b64 s[0:1], s[36:37] ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[50:51] +; VI-NEXT: s_mov_b64 s[2:3], s[38:39] ; VI-NEXT: s_mov_b32 s32, 0 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 @@ -5163,23 +5163,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; ; CI-LABEL: test_call_external_void_func_v16i8: ; CI: ; %bb.0: -; CI-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] ; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; CI-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s50, -1 -; CI-NEXT: s_mov_b32 s51, 0xe8f000 -; CI-NEXT: s_add_u32 s48, s48, s3 +; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s38, -1 +; CI-NEXT: s_mov_b32 s39, 0xe8f000 +; CI-NEXT: s_add_u32 s36, s36, s3 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; CI-NEXT: s_addc_u32 s49, s49, 0 -; CI-NEXT: s_mov_b64 s[0:1], s[48:49] +; CI-NEXT: s_addc_u32 s37, s37, 0 +; CI-NEXT: s_mov_b64 s[0:1], s[36:37] ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[50:51] +; CI-NEXT: s_mov_b64 s[2:3], s[38:39] ; CI-NEXT: s_mov_b32 s32, 0 ; CI-NEXT: s_waitcnt vmcnt(0) ; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 @@ -5205,23 +5205,23 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { ; ; GFX9-LABEL: test_call_external_void_func_v16i8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s3 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s3 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_addc_u32 s37, s37, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 @@ -5324,29 +5324,29 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { ; VI-LABEL: stack_passed_arg_alignment_v32i32_f64: ; VI: ; %bb.0: ; %entry -; VI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s66, -1 -; VI-NEXT: s_mov_b32 s67, 0xe80000 -; VI-NEXT: s_add_u32 s64, s64, s5 +; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; VI-NEXT: s_mov_b32 s54, -1 +; VI-NEXT: s_mov_b32 s55, 0xe80000 +; VI-NEXT: s_add_u32 s52, s52, s5 ; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 ; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; VI-NEXT: s_mov_b32 s32, 0 -; VI-NEXT: s_addc_u32 s65, s65, 0 +; VI-NEXT: s_addc_u32 s53, s53, 0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s23 -; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 +; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; VI-NEXT: v_mov_b32_e32 v0, s4 -; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4 +; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; VI-NEXT: v_mov_b32_e32 v0, s5 ; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b64 s[0:1], s[64:65] -; VI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8 +; VI-NEXT: s_mov_b64 s[0:1], s[52:53] +; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; VI-NEXT: s_getpc_b64 s[4:5] ; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; VI-NEXT: s_mov_b64 s[2:3], s[66:67] +; VI-NEXT: s_mov_b64 s[2:3], s[54:55] ; VI-NEXT: v_mov_b32_e32 v0, s36 ; VI-NEXT: v_mov_b32_e32 v1, s37 ; VI-NEXT: v_mov_b32_e32 v2, s38 @@ -5383,29 +5383,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; ; CI-LABEL: stack_passed_arg_alignment_v32i32_f64: ; CI: ; %bb.0: ; %entry -; CI-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; CI-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; CI-NEXT: s_mov_b32 s66, -1 -; CI-NEXT: s_mov_b32 s67, 0xe8f000 -; CI-NEXT: s_add_u32 s64, s64, s5 +; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; CI-NEXT: s_mov_b32 s54, -1 +; CI-NEXT: s_mov_b32 s55, 0xe8f000 +; CI-NEXT: s_add_u32 s52, s52, s5 ; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19 ; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29 ; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9 ; CI-NEXT: s_mov_b32 s32, 0 -; CI-NEXT: s_addc_u32 s65, s65, 0 +; CI-NEXT: s_addc_u32 s53, s53, 0 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_mov_b32_e32 v0, s23 -; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 +; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; CI-NEXT: v_mov_b32_e32 v0, s4 -; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4 +; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; CI-NEXT: v_mov_b32_e32 v0, s5 ; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b64 s[0:1], s[64:65] -; CI-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8 +; CI-NEXT: s_mov_b64 s[0:1], s[52:53] +; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; CI-NEXT: s_getpc_b64 s[4:5] ; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; CI-NEXT: s_mov_b64 s[2:3], s[66:67] +; CI-NEXT: s_mov_b64 s[2:3], s[54:55] ; CI-NEXT: v_mov_b32_e32 v0, s36 ; CI-NEXT: v_mov_b32_e32 v1, s37 ; CI-NEXT: v_mov_b32_e32 v2, s38 @@ -5442,29 +5442,29 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; ; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s66, -1 -; GFX9-NEXT: s_mov_b32 s67, 0xe00000 -; GFX9-NEXT: s_add_u32 s64, s64, s5 +; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s54, -1 +; GFX9-NEXT: s_mov_b32 s55, 0xe00000 +; GFX9-NEXT: s_add_u32 s52, s52, s5 ; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 ; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 ; GFX9-NEXT: s_mov_b32 s32, 0 -; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_addc_u32 s53, s53, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s23 -; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 +; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:4 +; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 ; GFX9-NEXT: v_mov_b32_e32 v0, s5 ; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], s32 offset:8 +; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53] +; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 -; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55] ; GFX9-NEXT: v_mov_b32_e32 v0, s36 ; GFX9-NEXT: v_mov_b32_e32 v1, s37 ; GFX9-NEXT: v_mov_b32_e32 v2, s38 diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 002e82f676e8b..9561aa555c80e 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -255,24 +255,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] ; MUBUF-NEXT: v_writelane_b32 v40, s36, 0 ; MUBUF-NEXT: v_writelane_b32 v40, s37, 1 -; MUBUF-NEXT: v_writelane_b32 v40, s46, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s47, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s38, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s39, 3 ; MUBUF-NEXT: v_writelane_b32 v40, s48, 4 ; MUBUF-NEXT: v_writelane_b32 v40, s49, 5 ; MUBUF-NEXT: v_writelane_b32 v40, s50, 6 ; MUBUF-NEXT: v_writelane_b32 v40, s51, 7 ; MUBUF-NEXT: v_writelane_b32 v40, s52, 8 ; MUBUF-NEXT: v_writelane_b32 v40, s53, 9 -; MUBUF-NEXT: v_writelane_b32 v40, s62, 10 -; MUBUF-NEXT: v_writelane_b32 v40, s63, 11 +; MUBUF-NEXT: v_writelane_b32 v40, s54, 10 +; MUBUF-NEXT: v_writelane_b32 v40, s55, 11 ; MUBUF-NEXT: v_writelane_b32 v40, s64, 12 ; MUBUF-NEXT: v_writelane_b32 v40, s65, 13 ; MUBUF-NEXT: v_writelane_b32 v40, s66, 14 ; MUBUF-NEXT: v_writelane_b32 v40, s67, 15 ; MUBUF-NEXT: v_writelane_b32 v40, s68, 16 ; MUBUF-NEXT: v_writelane_b32 v40, s69, 17 -; MUBUF-NEXT: v_writelane_b32 v40, s78, 18 -; MUBUF-NEXT: v_writelane_b32 v40, s79, 19 +; MUBUF-NEXT: v_writelane_b32 v40, s70, 18 +; MUBUF-NEXT: v_writelane_b32 v40, s71, 19 ; MUBUF-NEXT: v_writelane_b32 v40, s80, 20 ; MUBUF-NEXT: v_writelane_b32 v40, s81, 21 ; MUBUF-NEXT: v_writelane_b32 v40, s82, 22 @@ -327,24 +327,24 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; MUBUF-NEXT: v_readlane_b32 s82, v40, 22 ; MUBUF-NEXT: v_readlane_b32 s81, v40, 21 ; MUBUF-NEXT: v_readlane_b32 s80, v40, 20 -; MUBUF-NEXT: v_readlane_b32 s79, v40, 19 -; MUBUF-NEXT: v_readlane_b32 s78, v40, 18 +; MUBUF-NEXT: v_readlane_b32 s71, v40, 19 +; MUBUF-NEXT: v_readlane_b32 s70, v40, 18 ; MUBUF-NEXT: v_readlane_b32 s69, v40, 17 ; MUBUF-NEXT: v_readlane_b32 s68, v40, 16 ; MUBUF-NEXT: v_readlane_b32 s67, v40, 15 ; MUBUF-NEXT: v_readlane_b32 s66, v40, 14 ; MUBUF-NEXT: v_readlane_b32 s65, v40, 13 ; MUBUF-NEXT: v_readlane_b32 s64, v40, 12 -; MUBUF-NEXT: v_readlane_b32 s63, v40, 11 -; MUBUF-NEXT: v_readlane_b32 s62, v40, 10 +; MUBUF-NEXT: v_readlane_b32 s55, v40, 11 +; MUBUF-NEXT: v_readlane_b32 s54, v40, 10 ; MUBUF-NEXT: v_readlane_b32 s53, v40, 9 ; MUBUF-NEXT: v_readlane_b32 s52, v40, 8 ; MUBUF-NEXT: v_readlane_b32 s51, v40, 7 ; MUBUF-NEXT: v_readlane_b32 s50, v40, 6 ; MUBUF-NEXT: v_readlane_b32 s49, v40, 5 ; MUBUF-NEXT: v_readlane_b32 s48, v40, 4 -; MUBUF-NEXT: v_readlane_b32 s47, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s46, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s39, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s38, v40, 2 ; MUBUF-NEXT: v_readlane_b32 s37, v40, 1 ; MUBUF-NEXT: v_readlane_b32 s36, v40, 0 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -363,22 +363,20 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: v_writelane_b32 v40, s31, 1 ; FLATSCR-NEXT: v_writelane_b32 v40, s36, 2 ; FLATSCR-NEXT: v_writelane_b32 v40, s37, 3 -; FLATSCR-NEXT: v_writelane_b32 v40, s46, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s47, 5 +; FLATSCR-NEXT: v_writelane_b32 v40, s38, 4 +; FLATSCR-NEXT: v_writelane_b32 v40, s39, 5 ; FLATSCR-NEXT: v_writelane_b32 v40, s48, 6 ; FLATSCR-NEXT: v_writelane_b32 v40, s49, 7 ; FLATSCR-NEXT: v_writelane_b32 v40, s50, 8 ; FLATSCR-NEXT: v_writelane_b32 v40, s51, 9 ; FLATSCR-NEXT: v_writelane_b32 v40, s52, 10 ; FLATSCR-NEXT: v_writelane_b32 v40, s53, 11 -; FLATSCR-NEXT: v_writelane_b32 v40, s62, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s63, 13 +; FLATSCR-NEXT: v_writelane_b32 v40, s54, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s55, 13 ; FLATSCR-NEXT: v_writelane_b32 v40, s64, 14 ; FLATSCR-NEXT: v_writelane_b32 v40, s65, 15 ; FLATSCR-NEXT: v_writelane_b32 v40, s66, 16 ; FLATSCR-NEXT: v_writelane_b32 v40, s67, 17 -; FLATSCR-NEXT: v_writelane_b32 v40, s68, 18 -; FLATSCR-NEXT: v_writelane_b32 v40, s69, 19 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART @@ -402,10 +400,10 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ; def s[0:15] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; def s[68:75] +; FLATSCR-NEXT: ; def s[72:79] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; def s[76:77] +; FLATSCR-NEXT: ; def s[88:89] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[52:67] @@ -417,30 +415,28 @@ define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { ; FLATSCR-NEXT: ; use s[16:31] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s[68:75] +; FLATSCR-NEXT: ; use s[72:79] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; use s[76:77] +; FLATSCR-NEXT: ; use s[88:89] ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; use s[0:15] ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s69, v40, 19 -; FLATSCR-NEXT: v_readlane_b32 s68, v40, 18 ; FLATSCR-NEXT: v_readlane_b32 s67, v40, 17 ; FLATSCR-NEXT: v_readlane_b32 s66, v40, 16 ; FLATSCR-NEXT: v_readlane_b32 s65, v40, 15 ; FLATSCR-NEXT: v_readlane_b32 s64, v40, 14 -; FLATSCR-NEXT: v_readlane_b32 s63, v40, 13 -; FLATSCR-NEXT: v_readlane_b32 s62, v40, 12 +; FLATSCR-NEXT: v_readlane_b32 s55, v40, 13 +; FLATSCR-NEXT: v_readlane_b32 s54, v40, 12 ; FLATSCR-NEXT: v_readlane_b32 s53, v40, 11 ; FLATSCR-NEXT: v_readlane_b32 s52, v40, 10 ; FLATSCR-NEXT: v_readlane_b32 s51, v40, 9 ; FLATSCR-NEXT: v_readlane_b32 s50, v40, 8 ; FLATSCR-NEXT: v_readlane_b32 s49, v40, 7 ; FLATSCR-NEXT: v_readlane_b32 s48, v40, 6 -; FLATSCR-NEXT: v_readlane_b32 s47, v40, 5 -; FLATSCR-NEXT: v_readlane_b32 s46, v40, 4 +; FLATSCR-NEXT: v_readlane_b32 s39, v40, 5 +; FLATSCR-NEXT: v_readlane_b32 s38, v40, 4 ; FLATSCR-NEXT: v_readlane_b32 s37, v40, 3 ; FLATSCR-NEXT: v_readlane_b32 s36, v40, 2 ; FLATSCR-NEXT: v_readlane_b32 s31, v40, 1 @@ -541,83 +537,79 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v1, s46, 0 -; MUBUF-NEXT: v_writelane_b32 v1, s47, 1 -; MUBUF-NEXT: v_writelane_b32 v1, s48, 2 -; MUBUF-NEXT: v_writelane_b32 v1, s49, 3 -; MUBUF-NEXT: v_writelane_b32 v1, s50, 4 -; MUBUF-NEXT: v_writelane_b32 v1, s51, 5 -; MUBUF-NEXT: v_writelane_b32 v1, s52, 6 -; MUBUF-NEXT: v_writelane_b32 v1, s53, 7 -; MUBUF-NEXT: v_writelane_b32 v1, s62, 8 -; MUBUF-NEXT: v_writelane_b32 v1, s63, 9 -; MUBUF-NEXT: v_writelane_b32 v1, s64, 10 -; MUBUF-NEXT: v_writelane_b32 v1, s65, 11 -; MUBUF-NEXT: v_writelane_b32 v1, s66, 12 -; MUBUF-NEXT: v_writelane_b32 v1, s67, 13 -; MUBUF-NEXT: v_writelane_b32 v1, s68, 14 -; MUBUF-NEXT: v_writelane_b32 v1, s69, 15 -; MUBUF-NEXT: v_writelane_b32 v1, s78, 16 -; MUBUF-NEXT: v_writelane_b32 v1, s79, 17 -; MUBUF-NEXT: v_writelane_b32 v1, s80, 18 -; MUBUF-NEXT: v_writelane_b32 v1, s81, 19 -; MUBUF-NEXT: v_writelane_b32 v1, s82, 20 -; MUBUF-NEXT: v_writelane_b32 v1, s83, 21 -; MUBUF-NEXT: v_writelane_b32 v1, s84, 22 -; MUBUF-NEXT: v_writelane_b32 v1, s85, 23 -; MUBUF-NEXT: v_writelane_b32 v1, s94, 24 -; MUBUF-NEXT: v_writelane_b32 v1, s95, 25 -; MUBUF-NEXT: v_writelane_b32 v1, s96, 26 -; MUBUF-NEXT: v_writelane_b32 v1, s97, 27 -; MUBUF-NEXT: v_writelane_b32 v1, s98, 28 -; MUBUF-NEXT: v_writelane_b32 v1, s99, 29 -; MUBUF-NEXT: v_writelane_b32 v1, s100, 30 +; MUBUF-NEXT: v_writelane_b32 v1, s48, 0 +; MUBUF-NEXT: v_writelane_b32 v1, s49, 1 +; MUBUF-NEXT: v_writelane_b32 v1, s50, 2 +; MUBUF-NEXT: v_writelane_b32 v1, s51, 3 +; MUBUF-NEXT: v_writelane_b32 v1, s52, 4 +; MUBUF-NEXT: v_writelane_b32 v1, s53, 5 +; MUBUF-NEXT: v_writelane_b32 v1, s54, 6 +; MUBUF-NEXT: v_writelane_b32 v1, s55, 7 +; MUBUF-NEXT: v_writelane_b32 v1, s64, 8 +; MUBUF-NEXT: v_writelane_b32 v1, s65, 9 +; MUBUF-NEXT: v_writelane_b32 v1, s66, 10 +; MUBUF-NEXT: v_writelane_b32 v1, s67, 11 +; MUBUF-NEXT: v_writelane_b32 v1, s68, 12 +; MUBUF-NEXT: v_writelane_b32 v1, s69, 13 +; MUBUF-NEXT: v_writelane_b32 v1, s70, 14 +; MUBUF-NEXT: v_writelane_b32 v1, s71, 15 +; MUBUF-NEXT: v_writelane_b32 v1, s80, 16 +; MUBUF-NEXT: v_writelane_b32 v1, s81, 17 +; MUBUF-NEXT: v_writelane_b32 v1, s82, 18 +; MUBUF-NEXT: v_writelane_b32 v1, s83, 19 +; MUBUF-NEXT: v_writelane_b32 v1, s84, 20 +; MUBUF-NEXT: v_writelane_b32 v1, s85, 21 +; MUBUF-NEXT: v_writelane_b32 v1, s86, 22 +; MUBUF-NEXT: v_writelane_b32 v1, s87, 23 +; MUBUF-NEXT: v_writelane_b32 v1, s96, 24 +; MUBUF-NEXT: v_writelane_b32 v1, s97, 25 +; MUBUF-NEXT: v_writelane_b32 v1, s98, 26 +; MUBUF-NEXT: v_writelane_b32 v1, s99, 27 +; MUBUF-NEXT: v_writelane_b32 v1, s100, 28 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v1, s101, 31 +; MUBUF-NEXT: v_writelane_b32 v1, s101, 29 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 32 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 30 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_readlane_b32 s102, v1, 32 -; MUBUF-NEXT: v_readlane_b32 s101, v1, 31 -; MUBUF-NEXT: v_readlane_b32 s100, v1, 30 -; MUBUF-NEXT: v_readlane_b32 s99, v1, 29 -; MUBUF-NEXT: v_readlane_b32 s98, v1, 28 -; MUBUF-NEXT: v_readlane_b32 s97, v1, 27 -; MUBUF-NEXT: v_readlane_b32 s96, v1, 26 -; MUBUF-NEXT: v_readlane_b32 s95, v1, 25 -; MUBUF-NEXT: v_readlane_b32 s94, v1, 24 -; MUBUF-NEXT: v_readlane_b32 s85, v1, 23 -; MUBUF-NEXT: v_readlane_b32 s84, v1, 22 -; MUBUF-NEXT: v_readlane_b32 s83, v1, 21 -; MUBUF-NEXT: v_readlane_b32 s82, v1, 20 -; MUBUF-NEXT: v_readlane_b32 s81, v1, 19 -; MUBUF-NEXT: v_readlane_b32 s80, v1, 18 -; MUBUF-NEXT: v_readlane_b32 s79, v1, 17 -; MUBUF-NEXT: v_readlane_b32 s78, v1, 16 -; MUBUF-NEXT: v_readlane_b32 s69, v1, 15 -; MUBUF-NEXT: v_readlane_b32 s68, v1, 14 -; MUBUF-NEXT: v_readlane_b32 s67, v1, 13 -; MUBUF-NEXT: v_readlane_b32 s66, v1, 12 -; MUBUF-NEXT: v_readlane_b32 s65, v1, 11 -; MUBUF-NEXT: v_readlane_b32 s64, v1, 10 -; MUBUF-NEXT: v_readlane_b32 s63, v1, 9 -; MUBUF-NEXT: v_readlane_b32 s62, v1, 8 -; MUBUF-NEXT: v_readlane_b32 s53, v1, 7 -; MUBUF-NEXT: v_readlane_b32 s52, v1, 6 -; MUBUF-NEXT: v_readlane_b32 s51, v1, 5 -; MUBUF-NEXT: v_readlane_b32 s50, v1, 4 -; MUBUF-NEXT: v_readlane_b32 s49, v1, 3 -; MUBUF-NEXT: v_readlane_b32 s48, v1, 2 -; MUBUF-NEXT: v_readlane_b32 s47, v1, 1 -; MUBUF-NEXT: v_readlane_b32 s46, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v1, 30 +; MUBUF-NEXT: v_readlane_b32 s101, v1, 29 +; MUBUF-NEXT: v_readlane_b32 s100, v1, 28 +; MUBUF-NEXT: v_readlane_b32 s99, v1, 27 +; MUBUF-NEXT: v_readlane_b32 s98, v1, 26 +; MUBUF-NEXT: v_readlane_b32 s97, v1, 25 +; MUBUF-NEXT: v_readlane_b32 s96, v1, 24 +; MUBUF-NEXT: v_readlane_b32 s87, v1, 23 +; MUBUF-NEXT: v_readlane_b32 s86, v1, 22 +; MUBUF-NEXT: v_readlane_b32 s85, v1, 21 +; MUBUF-NEXT: v_readlane_b32 s84, v1, 20 +; MUBUF-NEXT: v_readlane_b32 s83, v1, 19 +; MUBUF-NEXT: v_readlane_b32 s82, v1, 18 +; MUBUF-NEXT: v_readlane_b32 s81, v1, 17 +; MUBUF-NEXT: v_readlane_b32 s80, v1, 16 +; MUBUF-NEXT: v_readlane_b32 s71, v1, 15 +; MUBUF-NEXT: v_readlane_b32 s70, v1, 14 +; MUBUF-NEXT: v_readlane_b32 s69, v1, 13 +; MUBUF-NEXT: v_readlane_b32 s68, v1, 12 +; MUBUF-NEXT: v_readlane_b32 s67, v1, 11 +; MUBUF-NEXT: v_readlane_b32 s66, v1, 10 +; MUBUF-NEXT: v_readlane_b32 s65, v1, 9 +; MUBUF-NEXT: v_readlane_b32 s64, v1, 8 +; MUBUF-NEXT: v_readlane_b32 s55, v1, 7 +; MUBUF-NEXT: v_readlane_b32 s54, v1, 6 +; MUBUF-NEXT: v_readlane_b32 s53, v1, 5 +; MUBUF-NEXT: v_readlane_b32 s52, v1, 4 +; MUBUF-NEXT: v_readlane_b32 s51, v1, 3 +; MUBUF-NEXT: v_readlane_b32 s50, v1, 2 +; MUBUF-NEXT: v_readlane_b32 s49, v1, 1 +; MUBUF-NEXT: v_readlane_b32 s48, v1, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -634,83 +626,79 @@ define void @last_lane_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0 -; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1 -; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2 -; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3 -; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4 -; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5 -; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6 -; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7 -; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8 -; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9 -; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10 -; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11 -; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12 -; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13 -; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14 -; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15 -; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16 -; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17 -; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18 -; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19 -; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20 -; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21 -; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22 -; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23 -; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24 -; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25 -; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26 -; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27 -; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28 -; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29 -; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30 +; FLATSCR-NEXT: v_writelane_b32 v1, s48, 0 +; FLATSCR-NEXT: v_writelane_b32 v1, s49, 1 +; FLATSCR-NEXT: v_writelane_b32 v1, s50, 2 +; FLATSCR-NEXT: v_writelane_b32 v1, s51, 3 +; FLATSCR-NEXT: v_writelane_b32 v1, s52, 4 +; FLATSCR-NEXT: v_writelane_b32 v1, s53, 5 +; FLATSCR-NEXT: v_writelane_b32 v1, s54, 6 +; FLATSCR-NEXT: v_writelane_b32 v1, s55, 7 +; FLATSCR-NEXT: v_writelane_b32 v1, s64, 8 +; FLATSCR-NEXT: v_writelane_b32 v1, s65, 9 +; FLATSCR-NEXT: v_writelane_b32 v1, s66, 10 +; FLATSCR-NEXT: v_writelane_b32 v1, s67, 11 +; FLATSCR-NEXT: v_writelane_b32 v1, s68, 12 +; FLATSCR-NEXT: v_writelane_b32 v1, s69, 13 +; FLATSCR-NEXT: v_writelane_b32 v1, s70, 14 +; FLATSCR-NEXT: v_writelane_b32 v1, s71, 15 +; FLATSCR-NEXT: v_writelane_b32 v1, s80, 16 +; FLATSCR-NEXT: v_writelane_b32 v1, s81, 17 +; FLATSCR-NEXT: v_writelane_b32 v1, s82, 18 +; FLATSCR-NEXT: v_writelane_b32 v1, s83, 19 +; FLATSCR-NEXT: v_writelane_b32 v1, s84, 20 +; FLATSCR-NEXT: v_writelane_b32 v1, s85, 21 +; FLATSCR-NEXT: v_writelane_b32 v1, s86, 22 +; FLATSCR-NEXT: v_writelane_b32 v1, s87, 23 +; FLATSCR-NEXT: v_writelane_b32 v1, s96, 24 +; FLATSCR-NEXT: v_writelane_b32 v1, s97, 25 +; FLATSCR-NEXT: v_writelane_b32 v1, s98, 26 +; FLATSCR-NEXT: v_writelane_b32 v1, s99, 27 +; FLATSCR-NEXT: v_writelane_b32 v1, s100, 28 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31 +; FLATSCR-NEXT: v_writelane_b32 v1, s101, 29 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 30 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32 -; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31 -; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30 -; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29 -; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28 -; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27 -; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26 -; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25 -; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24 -; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23 -; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22 -; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21 -; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20 -; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19 -; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18 -; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17 -; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16 -; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15 -; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14 -; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13 -; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12 -; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11 -; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10 -; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9 -; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8 -; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7 -; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6 -; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5 -; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4 -; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3 -; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2 -; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1 -; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v1, 30 +; FLATSCR-NEXT: v_readlane_b32 s101, v1, 29 +; FLATSCR-NEXT: v_readlane_b32 s100, v1, 28 +; FLATSCR-NEXT: v_readlane_b32 s99, v1, 27 +; FLATSCR-NEXT: v_readlane_b32 s98, v1, 26 +; FLATSCR-NEXT: v_readlane_b32 s97, v1, 25 +; FLATSCR-NEXT: v_readlane_b32 s96, v1, 24 +; FLATSCR-NEXT: v_readlane_b32 s87, v1, 23 +; FLATSCR-NEXT: v_readlane_b32 s86, v1, 22 +; FLATSCR-NEXT: v_readlane_b32 s85, v1, 21 +; FLATSCR-NEXT: v_readlane_b32 s84, v1, 20 +; FLATSCR-NEXT: v_readlane_b32 s83, v1, 19 +; FLATSCR-NEXT: v_readlane_b32 s82, v1, 18 +; FLATSCR-NEXT: v_readlane_b32 s81, v1, 17 +; FLATSCR-NEXT: v_readlane_b32 s80, v1, 16 +; FLATSCR-NEXT: v_readlane_b32 s71, v1, 15 +; FLATSCR-NEXT: v_readlane_b32 s70, v1, 14 +; FLATSCR-NEXT: v_readlane_b32 s69, v1, 13 +; FLATSCR-NEXT: v_readlane_b32 s68, v1, 12 +; FLATSCR-NEXT: v_readlane_b32 s67, v1, 11 +; FLATSCR-NEXT: v_readlane_b32 s66, v1, 10 +; FLATSCR-NEXT: v_readlane_b32 s65, v1, 9 +; FLATSCR-NEXT: v_readlane_b32 s64, v1, 8 +; FLATSCR-NEXT: v_readlane_b32 s55, v1, 7 +; FLATSCR-NEXT: v_readlane_b32 s54, v1, 6 +; FLATSCR-NEXT: v_readlane_b32 s53, v1, 5 +; FLATSCR-NEXT: v_readlane_b32 s52, v1, 4 +; FLATSCR-NEXT: v_readlane_b32 s51, v1, 3 +; FLATSCR-NEXT: v_readlane_b32 s50, v1, 2 +; FLATSCR-NEXT: v_readlane_b32 s49, v1, 1 +; FLATSCR-NEXT: v_readlane_b32 s48, v1, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload @@ -743,83 +731,81 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; MUBUF-NEXT: s_mov_b64 exec, s[6:7] -; MUBUF-NEXT: v_writelane_b32 v1, s46, 0 -; MUBUF-NEXT: v_writelane_b32 v1, s47, 1 -; MUBUF-NEXT: v_writelane_b32 v1, s48, 2 -; MUBUF-NEXT: v_writelane_b32 v1, s49, 3 -; MUBUF-NEXT: v_writelane_b32 v1, s50, 4 -; MUBUF-NEXT: v_writelane_b32 v1, s51, 5 -; MUBUF-NEXT: v_writelane_b32 v1, s52, 6 -; MUBUF-NEXT: v_writelane_b32 v1, s53, 7 -; MUBUF-NEXT: v_writelane_b32 v1, s62, 8 -; MUBUF-NEXT: v_writelane_b32 v1, s63, 9 -; MUBUF-NEXT: v_writelane_b32 v1, s64, 10 -; MUBUF-NEXT: v_writelane_b32 v1, s65, 11 -; MUBUF-NEXT: v_writelane_b32 v1, s66, 12 -; MUBUF-NEXT: v_writelane_b32 v1, s67, 13 -; MUBUF-NEXT: v_writelane_b32 v1, s68, 14 -; MUBUF-NEXT: v_writelane_b32 v1, s69, 15 -; MUBUF-NEXT: v_writelane_b32 v1, s78, 16 -; MUBUF-NEXT: v_writelane_b32 v1, s79, 17 -; MUBUF-NEXT: v_writelane_b32 v1, s80, 18 -; MUBUF-NEXT: v_writelane_b32 v1, s81, 19 -; MUBUF-NEXT: v_writelane_b32 v1, s82, 20 -; MUBUF-NEXT: v_writelane_b32 v1, s83, 21 -; MUBUF-NEXT: v_writelane_b32 v1, s84, 22 -; MUBUF-NEXT: v_writelane_b32 v1, s85, 23 -; MUBUF-NEXT: v_writelane_b32 v1, s94, 24 -; MUBUF-NEXT: v_writelane_b32 v1, s95, 25 -; MUBUF-NEXT: v_writelane_b32 v1, s96, 26 -; MUBUF-NEXT: v_writelane_b32 v1, s97, 27 -; MUBUF-NEXT: v_writelane_b32 v1, s98, 28 -; MUBUF-NEXT: v_writelane_b32 v1, s99, 29 -; MUBUF-NEXT: v_writelane_b32 v1, s100, 30 +; MUBUF-NEXT: v_writelane_b32 v1, s39, 0 +; MUBUF-NEXT: v_writelane_b32 v1, s48, 1 +; MUBUF-NEXT: v_writelane_b32 v1, s49, 2 +; MUBUF-NEXT: v_writelane_b32 v1, s50, 3 +; MUBUF-NEXT: v_writelane_b32 v1, s51, 4 +; MUBUF-NEXT: v_writelane_b32 v1, s52, 5 +; MUBUF-NEXT: v_writelane_b32 v1, s53, 6 +; MUBUF-NEXT: v_writelane_b32 v1, s54, 7 +; MUBUF-NEXT: v_writelane_b32 v1, s55, 8 +; MUBUF-NEXT: v_writelane_b32 v1, s64, 9 +; MUBUF-NEXT: v_writelane_b32 v1, s65, 10 +; MUBUF-NEXT: v_writelane_b32 v1, s66, 11 +; MUBUF-NEXT: v_writelane_b32 v1, s67, 12 +; MUBUF-NEXT: v_writelane_b32 v1, s68, 13 +; MUBUF-NEXT: v_writelane_b32 v1, s69, 14 +; MUBUF-NEXT: v_writelane_b32 v1, s70, 15 +; MUBUF-NEXT: v_writelane_b32 v1, s71, 16 +; MUBUF-NEXT: v_writelane_b32 v1, s80, 17 +; MUBUF-NEXT: v_writelane_b32 v1, s81, 18 +; MUBUF-NEXT: v_writelane_b32 v1, s82, 19 +; MUBUF-NEXT: v_writelane_b32 v1, s83, 20 +; MUBUF-NEXT: v_writelane_b32 v1, s84, 21 +; MUBUF-NEXT: v_writelane_b32 v1, s85, 22 +; MUBUF-NEXT: v_writelane_b32 v1, s86, 23 +; MUBUF-NEXT: v_writelane_b32 v1, s87, 24 +; MUBUF-NEXT: v_writelane_b32 v1, s96, 25 +; MUBUF-NEXT: v_writelane_b32 v1, s97, 26 +; MUBUF-NEXT: v_writelane_b32 v1, s98, 27 +; MUBUF-NEXT: v_writelane_b32 v1, s99, 28 +; MUBUF-NEXT: v_writelane_b32 v1, s100, 29 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: v_writelane_b32 v1, s101, 31 +; MUBUF-NEXT: v_writelane_b32 v1, s101, 30 ; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber v41 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_writelane_b32 v1, s102, 32 +; MUBUF-NEXT: v_writelane_b32 v1, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; MUBUF-NEXT: s_addk_i32 s32, 0x400 -; MUBUF-NEXT: v_readlane_b32 s102, v1, 32 -; MUBUF-NEXT: v_readlane_b32 s101, v1, 31 -; MUBUF-NEXT: v_readlane_b32 s100, v1, 30 -; MUBUF-NEXT: v_readlane_b32 s99, v1, 29 -; MUBUF-NEXT: v_readlane_b32 s98, v1, 28 -; MUBUF-NEXT: v_readlane_b32 s97, v1, 27 -; MUBUF-NEXT: v_readlane_b32 s96, v1, 26 -; MUBUF-NEXT: v_readlane_b32 s95, v1, 25 -; MUBUF-NEXT: v_readlane_b32 s94, v1, 24 -; MUBUF-NEXT: v_readlane_b32 s85, v1, 23 -; MUBUF-NEXT: v_readlane_b32 s84, v1, 22 -; MUBUF-NEXT: v_readlane_b32 s83, v1, 21 -; MUBUF-NEXT: v_readlane_b32 s82, v1, 20 -; MUBUF-NEXT: v_readlane_b32 s81, v1, 19 -; MUBUF-NEXT: v_readlane_b32 s80, v1, 18 -; MUBUF-NEXT: v_readlane_b32 s79, v1, 17 -; MUBUF-NEXT: v_readlane_b32 s78, v1, 16 -; MUBUF-NEXT: v_readlane_b32 s69, v1, 15 -; MUBUF-NEXT: v_readlane_b32 s68, v1, 14 -; MUBUF-NEXT: v_readlane_b32 s67, v1, 13 -; MUBUF-NEXT: v_readlane_b32 s66, v1, 12 -; MUBUF-NEXT: v_readlane_b32 s65, v1, 11 -; MUBUF-NEXT: v_readlane_b32 s64, v1, 10 -; MUBUF-NEXT: v_readlane_b32 s63, v1, 9 -; MUBUF-NEXT: v_readlane_b32 s62, v1, 8 -; MUBUF-NEXT: v_readlane_b32 s53, v1, 7 -; MUBUF-NEXT: v_readlane_b32 s52, v1, 6 -; MUBUF-NEXT: v_readlane_b32 s51, v1, 5 -; MUBUF-NEXT: v_readlane_b32 s50, v1, 4 -; MUBUF-NEXT: v_readlane_b32 s49, v1, 3 -; MUBUF-NEXT: v_readlane_b32 s48, v1, 2 -; MUBUF-NEXT: v_readlane_b32 s47, v1, 1 -; MUBUF-NEXT: v_readlane_b32 s46, v1, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v1, 31 +; MUBUF-NEXT: v_readlane_b32 s101, v1, 30 +; MUBUF-NEXT: v_readlane_b32 s100, v1, 29 +; MUBUF-NEXT: v_readlane_b32 s99, v1, 28 +; MUBUF-NEXT: v_readlane_b32 s98, v1, 27 +; MUBUF-NEXT: v_readlane_b32 s97, v1, 26 +; MUBUF-NEXT: v_readlane_b32 s96, v1, 25 +; MUBUF-NEXT: v_readlane_b32 s87, v1, 24 +; MUBUF-NEXT: v_readlane_b32 s86, v1, 23 +; MUBUF-NEXT: v_readlane_b32 s85, v1, 22 +; MUBUF-NEXT: v_readlane_b32 s84, v1, 21 +; MUBUF-NEXT: v_readlane_b32 s83, v1, 20 +; MUBUF-NEXT: v_readlane_b32 s82, v1, 19 +; MUBUF-NEXT: v_readlane_b32 s81, v1, 18 +; MUBUF-NEXT: v_readlane_b32 s80, v1, 17 +; MUBUF-NEXT: v_readlane_b32 s71, v1, 16 +; MUBUF-NEXT: v_readlane_b32 s70, v1, 15 +; MUBUF-NEXT: v_readlane_b32 s69, v1, 14 +; MUBUF-NEXT: v_readlane_b32 s68, v1, 13 +; MUBUF-NEXT: v_readlane_b32 s67, v1, 12 +; MUBUF-NEXT: v_readlane_b32 s66, v1, 11 +; MUBUF-NEXT: v_readlane_b32 s65, v1, 10 +; MUBUF-NEXT: v_readlane_b32 s64, v1, 9 +; MUBUF-NEXT: v_readlane_b32 s55, v1, 8 +; MUBUF-NEXT: v_readlane_b32 s54, v1, 7 +; MUBUF-NEXT: v_readlane_b32 s53, v1, 6 +; MUBUF-NEXT: v_readlane_b32 s52, v1, 5 +; MUBUF-NEXT: v_readlane_b32 s51, v1, 4 +; MUBUF-NEXT: v_readlane_b32 s50, v1, 3 +; MUBUF-NEXT: v_readlane_b32 s49, v1, 2 +; MUBUF-NEXT: v_readlane_b32 s48, v1, 1 +; MUBUF-NEXT: v_readlane_b32 s39, v1, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 ; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload @@ -836,83 +822,81 @@ define void @no_new_vgpr_for_fp_csr() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:8 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v1, s46, 0 -; FLATSCR-NEXT: v_writelane_b32 v1, s47, 1 -; FLATSCR-NEXT: v_writelane_b32 v1, s48, 2 -; FLATSCR-NEXT: v_writelane_b32 v1, s49, 3 -; FLATSCR-NEXT: v_writelane_b32 v1, s50, 4 -; FLATSCR-NEXT: v_writelane_b32 v1, s51, 5 -; FLATSCR-NEXT: v_writelane_b32 v1, s52, 6 -; FLATSCR-NEXT: v_writelane_b32 v1, s53, 7 -; FLATSCR-NEXT: v_writelane_b32 v1, s62, 8 -; FLATSCR-NEXT: v_writelane_b32 v1, s63, 9 -; FLATSCR-NEXT: v_writelane_b32 v1, s64, 10 -; FLATSCR-NEXT: v_writelane_b32 v1, s65, 11 -; FLATSCR-NEXT: v_writelane_b32 v1, s66, 12 -; FLATSCR-NEXT: v_writelane_b32 v1, s67, 13 -; FLATSCR-NEXT: v_writelane_b32 v1, s68, 14 -; FLATSCR-NEXT: v_writelane_b32 v1, s69, 15 -; FLATSCR-NEXT: v_writelane_b32 v1, s78, 16 -; FLATSCR-NEXT: v_writelane_b32 v1, s79, 17 -; FLATSCR-NEXT: v_writelane_b32 v1, s80, 18 -; FLATSCR-NEXT: v_writelane_b32 v1, s81, 19 -; FLATSCR-NEXT: v_writelane_b32 v1, s82, 20 -; FLATSCR-NEXT: v_writelane_b32 v1, s83, 21 -; FLATSCR-NEXT: v_writelane_b32 v1, s84, 22 -; FLATSCR-NEXT: v_writelane_b32 v1, s85, 23 -; FLATSCR-NEXT: v_writelane_b32 v1, s94, 24 -; FLATSCR-NEXT: v_writelane_b32 v1, s95, 25 -; FLATSCR-NEXT: v_writelane_b32 v1, s96, 26 -; FLATSCR-NEXT: v_writelane_b32 v1, s97, 27 -; FLATSCR-NEXT: v_writelane_b32 v1, s98, 28 -; FLATSCR-NEXT: v_writelane_b32 v1, s99, 29 -; FLATSCR-NEXT: v_writelane_b32 v1, s100, 30 +; FLATSCR-NEXT: v_writelane_b32 v1, s39, 0 +; FLATSCR-NEXT: v_writelane_b32 v1, s48, 1 +; FLATSCR-NEXT: v_writelane_b32 v1, s49, 2 +; FLATSCR-NEXT: v_writelane_b32 v1, s50, 3 +; FLATSCR-NEXT: v_writelane_b32 v1, s51, 4 +; FLATSCR-NEXT: v_writelane_b32 v1, s52, 5 +; FLATSCR-NEXT: v_writelane_b32 v1, s53, 6 +; FLATSCR-NEXT: v_writelane_b32 v1, s54, 7 +; FLATSCR-NEXT: v_writelane_b32 v1, s55, 8 +; FLATSCR-NEXT: v_writelane_b32 v1, s64, 9 +; FLATSCR-NEXT: v_writelane_b32 v1, s65, 10 +; FLATSCR-NEXT: v_writelane_b32 v1, s66, 11 +; FLATSCR-NEXT: v_writelane_b32 v1, s67, 12 +; FLATSCR-NEXT: v_writelane_b32 v1, s68, 13 +; FLATSCR-NEXT: v_writelane_b32 v1, s69, 14 +; FLATSCR-NEXT: v_writelane_b32 v1, s70, 15 +; FLATSCR-NEXT: v_writelane_b32 v1, s71, 16 +; FLATSCR-NEXT: v_writelane_b32 v1, s80, 17 +; FLATSCR-NEXT: v_writelane_b32 v1, s81, 18 +; FLATSCR-NEXT: v_writelane_b32 v1, s82, 19 +; FLATSCR-NEXT: v_writelane_b32 v1, s83, 20 +; FLATSCR-NEXT: v_writelane_b32 v1, s84, 21 +; FLATSCR-NEXT: v_writelane_b32 v1, s85, 22 +; FLATSCR-NEXT: v_writelane_b32 v1, s86, 23 +; FLATSCR-NEXT: v_writelane_b32 v1, s87, 24 +; FLATSCR-NEXT: v_writelane_b32 v1, s96, 25 +; FLATSCR-NEXT: v_writelane_b32 v1, s97, 26 +; FLATSCR-NEXT: v_writelane_b32 v1, s98, 27 +; FLATSCR-NEXT: v_writelane_b32 v1, s99, 28 +; FLATSCR-NEXT: v_writelane_b32 v1, s100, 29 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill -; FLATSCR-NEXT: v_writelane_b32 v1, s101, 31 +; FLATSCR-NEXT: v_writelane_b32 v1, s101, 30 ; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber v41 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_writelane_b32 v1, s102, 32 +; FLATSCR-NEXT: v_writelane_b32 v1, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_add_i32 s32, s32, 16 -; FLATSCR-NEXT: v_readlane_b32 s102, v1, 32 -; FLATSCR-NEXT: v_readlane_b32 s101, v1, 31 -; FLATSCR-NEXT: v_readlane_b32 s100, v1, 30 -; FLATSCR-NEXT: v_readlane_b32 s99, v1, 29 -; FLATSCR-NEXT: v_readlane_b32 s98, v1, 28 -; FLATSCR-NEXT: v_readlane_b32 s97, v1, 27 -; FLATSCR-NEXT: v_readlane_b32 s96, v1, 26 -; FLATSCR-NEXT: v_readlane_b32 s95, v1, 25 -; FLATSCR-NEXT: v_readlane_b32 s94, v1, 24 -; FLATSCR-NEXT: v_readlane_b32 s85, v1, 23 -; FLATSCR-NEXT: v_readlane_b32 s84, v1, 22 -; FLATSCR-NEXT: v_readlane_b32 s83, v1, 21 -; FLATSCR-NEXT: v_readlane_b32 s82, v1, 20 -; FLATSCR-NEXT: v_readlane_b32 s81, v1, 19 -; FLATSCR-NEXT: v_readlane_b32 s80, v1, 18 -; FLATSCR-NEXT: v_readlane_b32 s79, v1, 17 -; FLATSCR-NEXT: v_readlane_b32 s78, v1, 16 -; FLATSCR-NEXT: v_readlane_b32 s69, v1, 15 -; FLATSCR-NEXT: v_readlane_b32 s68, v1, 14 -; FLATSCR-NEXT: v_readlane_b32 s67, v1, 13 -; FLATSCR-NEXT: v_readlane_b32 s66, v1, 12 -; FLATSCR-NEXT: v_readlane_b32 s65, v1, 11 -; FLATSCR-NEXT: v_readlane_b32 s64, v1, 10 -; FLATSCR-NEXT: v_readlane_b32 s63, v1, 9 -; FLATSCR-NEXT: v_readlane_b32 s62, v1, 8 -; FLATSCR-NEXT: v_readlane_b32 s53, v1, 7 -; FLATSCR-NEXT: v_readlane_b32 s52, v1, 6 -; FLATSCR-NEXT: v_readlane_b32 s51, v1, 5 -; FLATSCR-NEXT: v_readlane_b32 s50, v1, 4 -; FLATSCR-NEXT: v_readlane_b32 s49, v1, 3 -; FLATSCR-NEXT: v_readlane_b32 s48, v1, 2 -; FLATSCR-NEXT: v_readlane_b32 s47, v1, 1 -; FLATSCR-NEXT: v_readlane_b32 s46, v1, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v1, 31 +; FLATSCR-NEXT: v_readlane_b32 s101, v1, 30 +; FLATSCR-NEXT: v_readlane_b32 s100, v1, 29 +; FLATSCR-NEXT: v_readlane_b32 s99, v1, 28 +; FLATSCR-NEXT: v_readlane_b32 s98, v1, 27 +; FLATSCR-NEXT: v_readlane_b32 s97, v1, 26 +; FLATSCR-NEXT: v_readlane_b32 s96, v1, 25 +; FLATSCR-NEXT: v_readlane_b32 s87, v1, 24 +; FLATSCR-NEXT: v_readlane_b32 s86, v1, 23 +; FLATSCR-NEXT: v_readlane_b32 s85, v1, 22 +; FLATSCR-NEXT: v_readlane_b32 s84, v1, 21 +; FLATSCR-NEXT: v_readlane_b32 s83, v1, 20 +; FLATSCR-NEXT: v_readlane_b32 s82, v1, 19 +; FLATSCR-NEXT: v_readlane_b32 s81, v1, 18 +; FLATSCR-NEXT: v_readlane_b32 s80, v1, 17 +; FLATSCR-NEXT: v_readlane_b32 s71, v1, 16 +; FLATSCR-NEXT: v_readlane_b32 s70, v1, 15 +; FLATSCR-NEXT: v_readlane_b32 s69, v1, 14 +; FLATSCR-NEXT: v_readlane_b32 s68, v1, 13 +; FLATSCR-NEXT: v_readlane_b32 s67, v1, 12 +; FLATSCR-NEXT: v_readlane_b32 s66, v1, 11 +; FLATSCR-NEXT: v_readlane_b32 s65, v1, 10 +; FLATSCR-NEXT: v_readlane_b32 s64, v1, 9 +; FLATSCR-NEXT: v_readlane_b32 s55, v1, 8 +; FLATSCR-NEXT: v_readlane_b32 s54, v1, 7 +; FLATSCR-NEXT: v_readlane_b32 s53, v1, 6 +; FLATSCR-NEXT: v_readlane_b32 s52, v1, 5 +; FLATSCR-NEXT: v_readlane_b32 s51, v1, 4 +; FLATSCR-NEXT: v_readlane_b32 s50, v1, 3 +; FLATSCR-NEXT: v_readlane_b32 s49, v1, 2 +; FLATSCR-NEXT: v_readlane_b32 s48, v1, 1 +; FLATSCR-NEXT: v_readlane_b32 s39, v1, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:8 ; 4-byte Folded Reload @@ -980,7 +964,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s40, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -999,14 +983,14 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: s_mov_b32 s33, s40 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 s38, s33 +; FLATSCR-NEXT: s_mov_b32 s40, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v1, s33 offset:4 ; 4-byte Folded Spill @@ -1025,7 +1009,7 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v1, off, s33 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_mov_b32 s33, s38 +; FLATSCR-NEXT: s_mov_b32 s33, s40 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -1046,7 +1030,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; MUBUF-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s40, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill @@ -1068,14 +1052,14 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: s_mov_b32 s33, s40 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 s38, s33 +; FLATSCR-NEXT: s_mov_b32 s40, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 offset:4 ; 4-byte Folded Spill @@ -1097,7 +1081,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_mov_b32 s33, s38 +; FLATSCR-NEXT: s_mov_b32 s33, s40 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -1125,7 +1109,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; MUBUF-LABEL: scratch_reg_needed_mubuf_offset: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s40, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 ; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 @@ -1151,14 +1135,14 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s6 ; 4-byte Folded Reload ; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: s_mov_b32 s33, s40 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: scratch_reg_needed_mubuf_offset: ; FLATSCR: ; %bb.0: ; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: s_mov_b32 s38, s33 +; FLATSCR-NEXT: s_mov_b32 s40, s33 ; FLATSCR-NEXT: s_mov_b32 s33, s32 ; FLATSCR-NEXT: s_or_saveexec_b64 s[0:1], -1 ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 @@ -1184,7 +1168,7 @@ define void @scratch_reg_needed_mubuf_offset(ptr addrspace(5) byval([4096 x i8]) ; FLATSCR-NEXT: s_add_i32 s2, s33, 0x1004 ; FLATSCR-NEXT: scratch_load_dword v40, off, s2 ; 4-byte Folded Reload ; FLATSCR-NEXT: s_mov_b64 exec, s[0:1] -; FLATSCR-NEXT: s_mov_b32 s33, s38 +; FLATSCR-NEXT: s_mov_b32 s33, s40 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) @@ -1284,7 +1268,7 @@ define void @callee_need_to_spill_fp_to_memory() #3 { ; MUBUF-LABEL: callee_need_to_spill_fp_to_memory: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s40, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs @@ -1292,7 +1276,7 @@ define void @callee_need_to_spill_fp_to_memory() #3 { ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: s_mov_b32 s33, s40 ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; ; FLATSCR-LABEL: callee_need_to_spill_fp_to_memory: @@ -1329,89 +1313,89 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; MUBUF-LABEL: callee_need_to_spill_fp_to_memory_full_reserved_vgpr: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: v_writelane_b32 v39, s46, 0 -; MUBUF-NEXT: v_writelane_b32 v39, s47, 1 -; MUBUF-NEXT: v_writelane_b32 v39, s48, 2 -; MUBUF-NEXT: v_writelane_b32 v39, s49, 3 -; MUBUF-NEXT: v_writelane_b32 v39, s50, 4 -; MUBUF-NEXT: v_writelane_b32 v39, s51, 5 -; MUBUF-NEXT: v_writelane_b32 v39, s52, 6 -; MUBUF-NEXT: v_writelane_b32 v39, s53, 7 -; MUBUF-NEXT: v_writelane_b32 v39, s62, 8 -; MUBUF-NEXT: v_writelane_b32 v39, s63, 9 -; MUBUF-NEXT: v_writelane_b32 v39, s64, 10 -; MUBUF-NEXT: v_writelane_b32 v39, s65, 11 -; MUBUF-NEXT: v_writelane_b32 v39, s66, 12 -; MUBUF-NEXT: v_writelane_b32 v39, s67, 13 -; MUBUF-NEXT: v_writelane_b32 v39, s68, 14 -; MUBUF-NEXT: v_writelane_b32 v39, s69, 15 -; MUBUF-NEXT: v_writelane_b32 v39, s78, 16 -; MUBUF-NEXT: v_writelane_b32 v39, s79, 17 -; MUBUF-NEXT: v_writelane_b32 v39, s80, 18 -; MUBUF-NEXT: v_writelane_b32 v39, s81, 19 -; MUBUF-NEXT: v_writelane_b32 v39, s82, 20 -; MUBUF-NEXT: v_writelane_b32 v39, s83, 21 -; MUBUF-NEXT: v_writelane_b32 v39, s84, 22 -; MUBUF-NEXT: v_writelane_b32 v39, s85, 23 -; MUBUF-NEXT: v_writelane_b32 v39, s94, 24 -; MUBUF-NEXT: v_writelane_b32 v39, s95, 25 -; MUBUF-NEXT: v_writelane_b32 v39, s96, 26 -; MUBUF-NEXT: v_writelane_b32 v39, s97, 27 -; MUBUF-NEXT: v_writelane_b32 v39, s98, 28 -; MUBUF-NEXT: v_writelane_b32 v39, s99, 29 -; MUBUF-NEXT: v_writelane_b32 v39, s100, 30 -; MUBUF-NEXT: v_writelane_b32 v39, s101, 31 +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v39, s4, 32 +; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 +; MUBUF-NEXT: v_writelane_b32 v39, s48, 1 +; MUBUF-NEXT: v_writelane_b32 v39, s49, 2 +; MUBUF-NEXT: v_writelane_b32 v39, s50, 3 +; MUBUF-NEXT: v_writelane_b32 v39, s51, 4 +; MUBUF-NEXT: v_writelane_b32 v39, s52, 5 +; MUBUF-NEXT: v_writelane_b32 v39, s53, 6 +; MUBUF-NEXT: v_writelane_b32 v39, s54, 7 +; MUBUF-NEXT: v_writelane_b32 v39, s55, 8 +; MUBUF-NEXT: v_writelane_b32 v39, s64, 9 +; MUBUF-NEXT: v_writelane_b32 v39, s65, 10 +; MUBUF-NEXT: v_writelane_b32 v39, s66, 11 +; MUBUF-NEXT: v_writelane_b32 v39, s67, 12 +; MUBUF-NEXT: v_writelane_b32 v39, s68, 13 +; MUBUF-NEXT: v_writelane_b32 v39, s69, 14 +; MUBUF-NEXT: v_writelane_b32 v39, s70, 15 +; MUBUF-NEXT: v_writelane_b32 v39, s71, 16 +; MUBUF-NEXT: v_writelane_b32 v39, s80, 17 +; MUBUF-NEXT: v_writelane_b32 v39, s81, 18 +; MUBUF-NEXT: v_writelane_b32 v39, s82, 19 +; MUBUF-NEXT: v_writelane_b32 v39, s83, 20 +; MUBUF-NEXT: v_writelane_b32 v39, s84, 21 +; MUBUF-NEXT: v_writelane_b32 v39, s85, 22 +; MUBUF-NEXT: v_writelane_b32 v39, s86, 23 +; MUBUF-NEXT: v_writelane_b32 v39, s87, 24 +; MUBUF-NEXT: v_writelane_b32 v39, s96, 25 +; MUBUF-NEXT: v_writelane_b32 v39, s97, 26 +; MUBUF-NEXT: v_writelane_b32 v39, s98, 27 +; MUBUF-NEXT: v_writelane_b32 v39, s99, 28 +; MUBUF-NEXT: v_writelane_b32 v39, s100, 29 +; MUBUF-NEXT: v_writelane_b32 v39, s101, 30 ; MUBUF-NEXT: s_addk_i32 s32, 0x200 -; MUBUF-NEXT: v_writelane_b32 v39, s102, 32 +; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs except CSR v40 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s102, v39, 32 -; MUBUF-NEXT: v_readlane_b32 s101, v39, 31 -; MUBUF-NEXT: v_readlane_b32 s100, v39, 30 -; MUBUF-NEXT: v_readlane_b32 s99, v39, 29 -; MUBUF-NEXT: v_readlane_b32 s98, v39, 28 -; MUBUF-NEXT: v_readlane_b32 s97, v39, 27 -; MUBUF-NEXT: v_readlane_b32 s96, v39, 26 -; MUBUF-NEXT: v_readlane_b32 s95, v39, 25 -; MUBUF-NEXT: v_readlane_b32 s94, v39, 24 -; MUBUF-NEXT: v_readlane_b32 s85, v39, 23 -; MUBUF-NEXT: v_readlane_b32 s84, v39, 22 -; MUBUF-NEXT: v_readlane_b32 s83, v39, 21 -; MUBUF-NEXT: v_readlane_b32 s82, v39, 20 -; MUBUF-NEXT: v_readlane_b32 s81, v39, 19 -; MUBUF-NEXT: v_readlane_b32 s80, v39, 18 -; MUBUF-NEXT: v_readlane_b32 s79, v39, 17 -; MUBUF-NEXT: v_readlane_b32 s78, v39, 16 -; MUBUF-NEXT: v_readlane_b32 s69, v39, 15 -; MUBUF-NEXT: v_readlane_b32 s68, v39, 14 -; MUBUF-NEXT: v_readlane_b32 s67, v39, 13 -; MUBUF-NEXT: v_readlane_b32 s66, v39, 12 -; MUBUF-NEXT: v_readlane_b32 s65, v39, 11 -; MUBUF-NEXT: v_readlane_b32 s64, v39, 10 -; MUBUF-NEXT: v_readlane_b32 s63, v39, 9 -; MUBUF-NEXT: v_readlane_b32 s62, v39, 8 -; MUBUF-NEXT: v_readlane_b32 s53, v39, 7 -; MUBUF-NEXT: v_readlane_b32 s52, v39, 6 -; MUBUF-NEXT: v_readlane_b32 s51, v39, 5 -; MUBUF-NEXT: v_readlane_b32 s50, v39, 4 -; MUBUF-NEXT: v_readlane_b32 s49, v39, 3 -; MUBUF-NEXT: v_readlane_b32 s48, v39, 2 -; MUBUF-NEXT: v_readlane_b32 s47, v39, 1 -; MUBUF-NEXT: v_readlane_b32 s46, v39, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v39, 31 +; MUBUF-NEXT: v_readlane_b32 s101, v39, 30 +; MUBUF-NEXT: v_readlane_b32 s100, v39, 29 +; MUBUF-NEXT: v_readlane_b32 s99, v39, 28 +; MUBUF-NEXT: v_readlane_b32 s98, v39, 27 +; MUBUF-NEXT: v_readlane_b32 s97, v39, 26 +; MUBUF-NEXT: v_readlane_b32 s96, v39, 25 +; MUBUF-NEXT: v_readlane_b32 s87, v39, 24 +; MUBUF-NEXT: v_readlane_b32 s86, v39, 23 +; MUBUF-NEXT: v_readlane_b32 s85, v39, 22 +; MUBUF-NEXT: v_readlane_b32 s84, v39, 21 +; MUBUF-NEXT: v_readlane_b32 s83, v39, 20 +; MUBUF-NEXT: v_readlane_b32 s82, v39, 19 +; MUBUF-NEXT: v_readlane_b32 s81, v39, 18 +; MUBUF-NEXT: v_readlane_b32 s80, v39, 17 +; MUBUF-NEXT: v_readlane_b32 s71, v39, 16 +; MUBUF-NEXT: v_readlane_b32 s70, v39, 15 +; MUBUF-NEXT: v_readlane_b32 s69, v39, 14 +; MUBUF-NEXT: v_readlane_b32 s68, v39, 13 +; MUBUF-NEXT: v_readlane_b32 s67, v39, 12 +; MUBUF-NEXT: v_readlane_b32 s66, v39, 11 +; MUBUF-NEXT: v_readlane_b32 s65, v39, 10 +; MUBUF-NEXT: v_readlane_b32 s64, v39, 9 +; MUBUF-NEXT: v_readlane_b32 s55, v39, 8 +; MUBUF-NEXT: v_readlane_b32 s54, v39, 7 +; MUBUF-NEXT: v_readlane_b32 s53, v39, 6 +; MUBUF-NEXT: v_readlane_b32 s52, v39, 5 +; MUBUF-NEXT: v_readlane_b32 s51, v39, 4 +; MUBUF-NEXT: v_readlane_b32 s50, v39, 3 +; MUBUF-NEXT: v_readlane_b32 s49, v39, 2 +; MUBUF-NEXT: v_readlane_b32 s48, v39, 1 +; MUBUF-NEXT: v_readlane_b32 s39, v39, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: v_readlane_b32 s4, v39, 32 +; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -1423,79 +1407,77 @@ define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 { ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v39, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0 -; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1 -; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2 -; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3 -; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4 -; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5 -; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6 -; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7 -; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8 -; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9 -; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10 -; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11 -; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12 -; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13 -; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14 -; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15 -; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16 -; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17 -; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18 -; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19 -; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20 -; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21 -; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22 -; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23 -; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24 -; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25 -; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26 -; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27 -; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28 -; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29 -; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30 -; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31 +; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 +; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1 +; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2 +; FLATSCR-NEXT: v_writelane_b32 v39, s50, 3 +; FLATSCR-NEXT: v_writelane_b32 v39, s51, 4 +; FLATSCR-NEXT: v_writelane_b32 v39, s52, 5 +; FLATSCR-NEXT: v_writelane_b32 v39, s53, 6 +; FLATSCR-NEXT: v_writelane_b32 v39, s54, 7 +; FLATSCR-NEXT: v_writelane_b32 v39, s55, 8 +; FLATSCR-NEXT: v_writelane_b32 v39, s64, 9 +; FLATSCR-NEXT: v_writelane_b32 v39, s65, 10 +; FLATSCR-NEXT: v_writelane_b32 v39, s66, 11 +; FLATSCR-NEXT: v_writelane_b32 v39, s67, 12 +; FLATSCR-NEXT: v_writelane_b32 v39, s68, 13 +; FLATSCR-NEXT: v_writelane_b32 v39, s69, 14 +; FLATSCR-NEXT: v_writelane_b32 v39, s70, 15 +; FLATSCR-NEXT: v_writelane_b32 v39, s71, 16 +; FLATSCR-NEXT: v_writelane_b32 v39, s80, 17 +; FLATSCR-NEXT: v_writelane_b32 v39, s81, 18 +; FLATSCR-NEXT: v_writelane_b32 v39, s82, 19 +; FLATSCR-NEXT: v_writelane_b32 v39, s83, 20 +; FLATSCR-NEXT: v_writelane_b32 v39, s84, 21 +; FLATSCR-NEXT: v_writelane_b32 v39, s85, 22 +; FLATSCR-NEXT: v_writelane_b32 v39, s86, 23 +; FLATSCR-NEXT: v_writelane_b32 v39, s87, 24 +; FLATSCR-NEXT: v_writelane_b32 v39, s96, 25 +; FLATSCR-NEXT: v_writelane_b32 v39, s97, 26 +; FLATSCR-NEXT: v_writelane_b32 v39, s98, 27 +; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28 +; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29 +; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30 ; FLATSCR-NEXT: s_add_i32 s32, s32, 8 -; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32 +; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32 -; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31 -; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30 -; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29 -; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28 -; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27 -; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26 -; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25 -; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24 -; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23 -; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22 -; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21 -; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20 -; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19 -; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18 -; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17 -; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16 -; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15 -; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14 -; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13 -; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12 -; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11 -; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10 -; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9 -; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8 -; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7 -; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6 -; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5 -; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4 -; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3 -; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2 -; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1 -; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v39, 31 +; FLATSCR-NEXT: v_readlane_b32 s101, v39, 30 +; FLATSCR-NEXT: v_readlane_b32 s100, v39, 29 +; FLATSCR-NEXT: v_readlane_b32 s99, v39, 28 +; FLATSCR-NEXT: v_readlane_b32 s98, v39, 27 +; FLATSCR-NEXT: v_readlane_b32 s97, v39, 26 +; FLATSCR-NEXT: v_readlane_b32 s96, v39, 25 +; FLATSCR-NEXT: v_readlane_b32 s87, v39, 24 +; FLATSCR-NEXT: v_readlane_b32 s86, v39, 23 +; FLATSCR-NEXT: v_readlane_b32 s85, v39, 22 +; FLATSCR-NEXT: v_readlane_b32 s84, v39, 21 +; FLATSCR-NEXT: v_readlane_b32 s83, v39, 20 +; FLATSCR-NEXT: v_readlane_b32 s82, v39, 19 +; FLATSCR-NEXT: v_readlane_b32 s81, v39, 18 +; FLATSCR-NEXT: v_readlane_b32 s80, v39, 17 +; FLATSCR-NEXT: v_readlane_b32 s71, v39, 16 +; FLATSCR-NEXT: v_readlane_b32 s70, v39, 15 +; FLATSCR-NEXT: v_readlane_b32 s69, v39, 14 +; FLATSCR-NEXT: v_readlane_b32 s68, v39, 13 +; FLATSCR-NEXT: v_readlane_b32 s67, v39, 12 +; FLATSCR-NEXT: v_readlane_b32 s66, v39, 11 +; FLATSCR-NEXT: v_readlane_b32 s65, v39, 10 +; FLATSCR-NEXT: v_readlane_b32 s64, v39, 9 +; FLATSCR-NEXT: v_readlane_b32 s55, v39, 8 +; FLATSCR-NEXT: v_readlane_b32 s54, v39, 7 +; FLATSCR-NEXT: v_readlane_b32 s53, v39, 6 +; FLATSCR-NEXT: v_readlane_b32 s52, v39, 5 +; FLATSCR-NEXT: v_readlane_b32 s51, v39, 4 +; FLATSCR-NEXT: v_readlane_b32 s50, v39, 3 +; FLATSCR-NEXT: v_readlane_b32 s49, v39, 2 +; FLATSCR-NEXT: v_readlane_b32 s48, v39, 1 +; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v39, off, s33 ; 4-byte Folded Reload @@ -1531,89 +1513,89 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; MUBUF-LABEL: callee_need_to_spill_fp_to_reg: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: v_writelane_b32 v40, s46, 0 -; MUBUF-NEXT: v_writelane_b32 v40, s47, 1 -; MUBUF-NEXT: v_writelane_b32 v40, s48, 2 -; MUBUF-NEXT: v_writelane_b32 v40, s49, 3 -; MUBUF-NEXT: v_writelane_b32 v40, s50, 4 -; MUBUF-NEXT: v_writelane_b32 v40, s51, 5 -; MUBUF-NEXT: v_writelane_b32 v40, s52, 6 -; MUBUF-NEXT: v_writelane_b32 v40, s53, 7 -; MUBUF-NEXT: v_writelane_b32 v40, s62, 8 -; MUBUF-NEXT: v_writelane_b32 v40, s63, 9 -; MUBUF-NEXT: v_writelane_b32 v40, s64, 10 -; MUBUF-NEXT: v_writelane_b32 v40, s65, 11 -; MUBUF-NEXT: v_writelane_b32 v40, s66, 12 -; MUBUF-NEXT: v_writelane_b32 v40, s67, 13 -; MUBUF-NEXT: v_writelane_b32 v40, s68, 14 -; MUBUF-NEXT: v_writelane_b32 v40, s69, 15 -; MUBUF-NEXT: v_writelane_b32 v40, s78, 16 -; MUBUF-NEXT: v_writelane_b32 v40, s79, 17 -; MUBUF-NEXT: v_writelane_b32 v40, s80, 18 -; MUBUF-NEXT: v_writelane_b32 v40, s81, 19 -; MUBUF-NEXT: v_writelane_b32 v40, s82, 20 -; MUBUF-NEXT: v_writelane_b32 v40, s83, 21 -; MUBUF-NEXT: v_writelane_b32 v40, s84, 22 -; MUBUF-NEXT: v_writelane_b32 v40, s85, 23 -; MUBUF-NEXT: v_writelane_b32 v40, s94, 24 -; MUBUF-NEXT: v_writelane_b32 v40, s95, 25 -; MUBUF-NEXT: v_writelane_b32 v40, s96, 26 -; MUBUF-NEXT: v_writelane_b32 v40, s97, 27 -; MUBUF-NEXT: v_writelane_b32 v40, s98, 28 -; MUBUF-NEXT: v_writelane_b32 v40, s99, 29 -; MUBUF-NEXT: v_writelane_b32 v40, s100, 30 -; MUBUF-NEXT: v_writelane_b32 v40, s101, 31 +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v40, s4, 32 +; MUBUF-NEXT: v_writelane_b32 v40, s39, 0 +; MUBUF-NEXT: v_writelane_b32 v40, s48, 1 +; MUBUF-NEXT: v_writelane_b32 v40, s49, 2 +; MUBUF-NEXT: v_writelane_b32 v40, s50, 3 +; MUBUF-NEXT: v_writelane_b32 v40, s51, 4 +; MUBUF-NEXT: v_writelane_b32 v40, s52, 5 +; MUBUF-NEXT: v_writelane_b32 v40, s53, 6 +; MUBUF-NEXT: v_writelane_b32 v40, s54, 7 +; MUBUF-NEXT: v_writelane_b32 v40, s55, 8 +; MUBUF-NEXT: v_writelane_b32 v40, s64, 9 +; MUBUF-NEXT: v_writelane_b32 v40, s65, 10 +; MUBUF-NEXT: v_writelane_b32 v40, s66, 11 +; MUBUF-NEXT: v_writelane_b32 v40, s67, 12 +; MUBUF-NEXT: v_writelane_b32 v40, s68, 13 +; MUBUF-NEXT: v_writelane_b32 v40, s69, 14 +; MUBUF-NEXT: v_writelane_b32 v40, s70, 15 +; MUBUF-NEXT: v_writelane_b32 v40, s71, 16 +; MUBUF-NEXT: v_writelane_b32 v40, s80, 17 +; MUBUF-NEXT: v_writelane_b32 v40, s81, 18 +; MUBUF-NEXT: v_writelane_b32 v40, s82, 19 +; MUBUF-NEXT: v_writelane_b32 v40, s83, 20 +; MUBUF-NEXT: v_writelane_b32 v40, s84, 21 +; MUBUF-NEXT: v_writelane_b32 v40, s85, 22 +; MUBUF-NEXT: v_writelane_b32 v40, s86, 23 +; MUBUF-NEXT: v_writelane_b32 v40, s87, 24 +; MUBUF-NEXT: v_writelane_b32 v40, s96, 25 +; MUBUF-NEXT: v_writelane_b32 v40, s97, 26 +; MUBUF-NEXT: v_writelane_b32 v40, s98, 27 +; MUBUF-NEXT: v_writelane_b32 v40, s99, 28 +; MUBUF-NEXT: v_writelane_b32 v40, s100, 29 +; MUBUF-NEXT: v_writelane_b32 v40, s101, 30 ; MUBUF-NEXT: s_addk_i32 s32, 0x200 -; MUBUF-NEXT: v_writelane_b32 v40, s102, 32 +; MUBUF-NEXT: v_writelane_b32 v40, s102, 31 ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; MUBUF-NEXT: ;;#ASMEND ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs except CSR v40 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s102, v40, 32 -; MUBUF-NEXT: v_readlane_b32 s101, v40, 31 -; MUBUF-NEXT: v_readlane_b32 s100, v40, 30 -; MUBUF-NEXT: v_readlane_b32 s99, v40, 29 -; MUBUF-NEXT: v_readlane_b32 s98, v40, 28 -; MUBUF-NEXT: v_readlane_b32 s97, v40, 27 -; MUBUF-NEXT: v_readlane_b32 s96, v40, 26 -; MUBUF-NEXT: v_readlane_b32 s95, v40, 25 -; MUBUF-NEXT: v_readlane_b32 s94, v40, 24 -; MUBUF-NEXT: v_readlane_b32 s85, v40, 23 -; MUBUF-NEXT: v_readlane_b32 s84, v40, 22 -; MUBUF-NEXT: v_readlane_b32 s83, v40, 21 -; MUBUF-NEXT: v_readlane_b32 s82, v40, 20 -; MUBUF-NEXT: v_readlane_b32 s81, v40, 19 -; MUBUF-NEXT: v_readlane_b32 s80, v40, 18 -; MUBUF-NEXT: v_readlane_b32 s79, v40, 17 -; MUBUF-NEXT: v_readlane_b32 s78, v40, 16 -; MUBUF-NEXT: v_readlane_b32 s69, v40, 15 -; MUBUF-NEXT: v_readlane_b32 s68, v40, 14 -; MUBUF-NEXT: v_readlane_b32 s67, v40, 13 -; MUBUF-NEXT: v_readlane_b32 s66, v40, 12 -; MUBUF-NEXT: v_readlane_b32 s65, v40, 11 -; MUBUF-NEXT: v_readlane_b32 s64, v40, 10 -; MUBUF-NEXT: v_readlane_b32 s63, v40, 9 -; MUBUF-NEXT: v_readlane_b32 s62, v40, 8 -; MUBUF-NEXT: v_readlane_b32 s53, v40, 7 -; MUBUF-NEXT: v_readlane_b32 s52, v40, 6 -; MUBUF-NEXT: v_readlane_b32 s51, v40, 5 -; MUBUF-NEXT: v_readlane_b32 s50, v40, 4 -; MUBUF-NEXT: v_readlane_b32 s49, v40, 3 -; MUBUF-NEXT: v_readlane_b32 s48, v40, 2 -; MUBUF-NEXT: v_readlane_b32 s47, v40, 1 -; MUBUF-NEXT: v_readlane_b32 s46, v40, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v40, 31 +; MUBUF-NEXT: v_readlane_b32 s101, v40, 30 +; MUBUF-NEXT: v_readlane_b32 s100, v40, 29 +; MUBUF-NEXT: v_readlane_b32 s99, v40, 28 +; MUBUF-NEXT: v_readlane_b32 s98, v40, 27 +; MUBUF-NEXT: v_readlane_b32 s97, v40, 26 +; MUBUF-NEXT: v_readlane_b32 s96, v40, 25 +; MUBUF-NEXT: v_readlane_b32 s87, v40, 24 +; MUBUF-NEXT: v_readlane_b32 s86, v40, 23 +; MUBUF-NEXT: v_readlane_b32 s85, v40, 22 +; MUBUF-NEXT: v_readlane_b32 s84, v40, 21 +; MUBUF-NEXT: v_readlane_b32 s83, v40, 20 +; MUBUF-NEXT: v_readlane_b32 s82, v40, 19 +; MUBUF-NEXT: v_readlane_b32 s81, v40, 18 +; MUBUF-NEXT: v_readlane_b32 s80, v40, 17 +; MUBUF-NEXT: v_readlane_b32 s71, v40, 16 +; MUBUF-NEXT: v_readlane_b32 s70, v40, 15 +; MUBUF-NEXT: v_readlane_b32 s69, v40, 14 +; MUBUF-NEXT: v_readlane_b32 s68, v40, 13 +; MUBUF-NEXT: v_readlane_b32 s67, v40, 12 +; MUBUF-NEXT: v_readlane_b32 s66, v40, 11 +; MUBUF-NEXT: v_readlane_b32 s65, v40, 10 +; MUBUF-NEXT: v_readlane_b32 s64, v40, 9 +; MUBUF-NEXT: v_readlane_b32 s55, v40, 8 +; MUBUF-NEXT: v_readlane_b32 s54, v40, 7 +; MUBUF-NEXT: v_readlane_b32 s53, v40, 6 +; MUBUF-NEXT: v_readlane_b32 s52, v40, 5 +; MUBUF-NEXT: v_readlane_b32 s51, v40, 4 +; MUBUF-NEXT: v_readlane_b32 s50, v40, 3 +; MUBUF-NEXT: v_readlane_b32 s49, v40, 2 +; MUBUF-NEXT: v_readlane_b32 s48, v40, 1 +; MUBUF-NEXT: v_readlane_b32 s39, v40, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: s_or_saveexec_b64 s[4:5], -1 +; MUBUF-NEXT: v_readlane_b32 s4, v40, 32 +; MUBUF-NEXT: s_or_saveexec_b64 s[6:7], -1 ; MUBUF-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -1625,79 +1607,77 @@ define void @callee_need_to_spill_fp_to_reg() #1 { ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_store_dword off, v40, s33 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v40, s46, 0 -; FLATSCR-NEXT: v_writelane_b32 v40, s47, 1 -; FLATSCR-NEXT: v_writelane_b32 v40, s48, 2 -; FLATSCR-NEXT: v_writelane_b32 v40, s49, 3 -; FLATSCR-NEXT: v_writelane_b32 v40, s50, 4 -; FLATSCR-NEXT: v_writelane_b32 v40, s51, 5 -; FLATSCR-NEXT: v_writelane_b32 v40, s52, 6 -; FLATSCR-NEXT: v_writelane_b32 v40, s53, 7 -; FLATSCR-NEXT: v_writelane_b32 v40, s62, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s63, 9 -; FLATSCR-NEXT: v_writelane_b32 v40, s64, 10 -; FLATSCR-NEXT: v_writelane_b32 v40, s65, 11 -; FLATSCR-NEXT: v_writelane_b32 v40, s66, 12 -; FLATSCR-NEXT: v_writelane_b32 v40, s67, 13 -; FLATSCR-NEXT: v_writelane_b32 v40, s68, 14 -; FLATSCR-NEXT: v_writelane_b32 v40, s69, 15 -; FLATSCR-NEXT: v_writelane_b32 v40, s78, 16 -; FLATSCR-NEXT: v_writelane_b32 v40, s79, 17 -; FLATSCR-NEXT: v_writelane_b32 v40, s80, 18 -; FLATSCR-NEXT: v_writelane_b32 v40, s81, 19 -; FLATSCR-NEXT: v_writelane_b32 v40, s82, 20 -; FLATSCR-NEXT: v_writelane_b32 v40, s83, 21 -; FLATSCR-NEXT: v_writelane_b32 v40, s84, 22 -; FLATSCR-NEXT: v_writelane_b32 v40, s85, 23 -; FLATSCR-NEXT: v_writelane_b32 v40, s94, 24 -; FLATSCR-NEXT: v_writelane_b32 v40, s95, 25 -; FLATSCR-NEXT: v_writelane_b32 v40, s96, 26 -; FLATSCR-NEXT: v_writelane_b32 v40, s97, 27 -; FLATSCR-NEXT: v_writelane_b32 v40, s98, 28 -; FLATSCR-NEXT: v_writelane_b32 v40, s99, 29 -; FLATSCR-NEXT: v_writelane_b32 v40, s100, 30 -; FLATSCR-NEXT: v_writelane_b32 v40, s101, 31 +; FLATSCR-NEXT: v_writelane_b32 v40, s39, 0 +; FLATSCR-NEXT: v_writelane_b32 v40, s48, 1 +; FLATSCR-NEXT: v_writelane_b32 v40, s49, 2 +; FLATSCR-NEXT: v_writelane_b32 v40, s50, 3 +; FLATSCR-NEXT: v_writelane_b32 v40, s51, 4 +; FLATSCR-NEXT: v_writelane_b32 v40, s52, 5 +; FLATSCR-NEXT: v_writelane_b32 v40, s53, 6 +; FLATSCR-NEXT: v_writelane_b32 v40, s54, 7 +; FLATSCR-NEXT: v_writelane_b32 v40, s55, 8 +; FLATSCR-NEXT: v_writelane_b32 v40, s64, 9 +; FLATSCR-NEXT: v_writelane_b32 v40, s65, 10 +; FLATSCR-NEXT: v_writelane_b32 v40, s66, 11 +; FLATSCR-NEXT: v_writelane_b32 v40, s67, 12 +; FLATSCR-NEXT: v_writelane_b32 v40, s68, 13 +; FLATSCR-NEXT: v_writelane_b32 v40, s69, 14 +; FLATSCR-NEXT: v_writelane_b32 v40, s70, 15 +; FLATSCR-NEXT: v_writelane_b32 v40, s71, 16 +; FLATSCR-NEXT: v_writelane_b32 v40, s80, 17 +; FLATSCR-NEXT: v_writelane_b32 v40, s81, 18 +; FLATSCR-NEXT: v_writelane_b32 v40, s82, 19 +; FLATSCR-NEXT: v_writelane_b32 v40, s83, 20 +; FLATSCR-NEXT: v_writelane_b32 v40, s84, 21 +; FLATSCR-NEXT: v_writelane_b32 v40, s85, 22 +; FLATSCR-NEXT: v_writelane_b32 v40, s86, 23 +; FLATSCR-NEXT: v_writelane_b32 v40, s87, 24 +; FLATSCR-NEXT: v_writelane_b32 v40, s96, 25 +; FLATSCR-NEXT: v_writelane_b32 v40, s97, 26 +; FLATSCR-NEXT: v_writelane_b32 v40, s98, 27 +; FLATSCR-NEXT: v_writelane_b32 v40, s99, 28 +; FLATSCR-NEXT: v_writelane_b32 v40, s100, 29 +; FLATSCR-NEXT: v_writelane_b32 v40, s101, 30 ; FLATSCR-NEXT: s_add_i32 s32, s32, 8 -; FLATSCR-NEXT: v_writelane_b32 v40, s102, 32 +; FLATSCR-NEXT: v_writelane_b32 v40, s102, 31 ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber nonpreserved SGPRs and 64 CSRs ; FLATSCR-NEXT: ;;#ASMEND ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s102, v40, 32 -; FLATSCR-NEXT: v_readlane_b32 s101, v40, 31 -; FLATSCR-NEXT: v_readlane_b32 s100, v40, 30 -; FLATSCR-NEXT: v_readlane_b32 s99, v40, 29 -; FLATSCR-NEXT: v_readlane_b32 s98, v40, 28 -; FLATSCR-NEXT: v_readlane_b32 s97, v40, 27 -; FLATSCR-NEXT: v_readlane_b32 s96, v40, 26 -; FLATSCR-NEXT: v_readlane_b32 s95, v40, 25 -; FLATSCR-NEXT: v_readlane_b32 s94, v40, 24 -; FLATSCR-NEXT: v_readlane_b32 s85, v40, 23 -; FLATSCR-NEXT: v_readlane_b32 s84, v40, 22 -; FLATSCR-NEXT: v_readlane_b32 s83, v40, 21 -; FLATSCR-NEXT: v_readlane_b32 s82, v40, 20 -; FLATSCR-NEXT: v_readlane_b32 s81, v40, 19 -; FLATSCR-NEXT: v_readlane_b32 s80, v40, 18 -; FLATSCR-NEXT: v_readlane_b32 s79, v40, 17 -; FLATSCR-NEXT: v_readlane_b32 s78, v40, 16 -; FLATSCR-NEXT: v_readlane_b32 s69, v40, 15 -; FLATSCR-NEXT: v_readlane_b32 s68, v40, 14 -; FLATSCR-NEXT: v_readlane_b32 s67, v40, 13 -; FLATSCR-NEXT: v_readlane_b32 s66, v40, 12 -; FLATSCR-NEXT: v_readlane_b32 s65, v40, 11 -; FLATSCR-NEXT: v_readlane_b32 s64, v40, 10 -; FLATSCR-NEXT: v_readlane_b32 s63, v40, 9 -; FLATSCR-NEXT: v_readlane_b32 s62, v40, 8 -; FLATSCR-NEXT: v_readlane_b32 s53, v40, 7 -; FLATSCR-NEXT: v_readlane_b32 s52, v40, 6 -; FLATSCR-NEXT: v_readlane_b32 s51, v40, 5 -; FLATSCR-NEXT: v_readlane_b32 s50, v40, 4 -; FLATSCR-NEXT: v_readlane_b32 s49, v40, 3 -; FLATSCR-NEXT: v_readlane_b32 s48, v40, 2 -; FLATSCR-NEXT: v_readlane_b32 s47, v40, 1 -; FLATSCR-NEXT: v_readlane_b32 s46, v40, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v40, 31 +; FLATSCR-NEXT: v_readlane_b32 s101, v40, 30 +; FLATSCR-NEXT: v_readlane_b32 s100, v40, 29 +; FLATSCR-NEXT: v_readlane_b32 s99, v40, 28 +; FLATSCR-NEXT: v_readlane_b32 s98, v40, 27 +; FLATSCR-NEXT: v_readlane_b32 s97, v40, 26 +; FLATSCR-NEXT: v_readlane_b32 s96, v40, 25 +; FLATSCR-NEXT: v_readlane_b32 s87, v40, 24 +; FLATSCR-NEXT: v_readlane_b32 s86, v40, 23 +; FLATSCR-NEXT: v_readlane_b32 s85, v40, 22 +; FLATSCR-NEXT: v_readlane_b32 s84, v40, 21 +; FLATSCR-NEXT: v_readlane_b32 s83, v40, 20 +; FLATSCR-NEXT: v_readlane_b32 s82, v40, 19 +; FLATSCR-NEXT: v_readlane_b32 s81, v40, 18 +; FLATSCR-NEXT: v_readlane_b32 s80, v40, 17 +; FLATSCR-NEXT: v_readlane_b32 s71, v40, 16 +; FLATSCR-NEXT: v_readlane_b32 s70, v40, 15 +; FLATSCR-NEXT: v_readlane_b32 s69, v40, 14 +; FLATSCR-NEXT: v_readlane_b32 s68, v40, 13 +; FLATSCR-NEXT: v_readlane_b32 s67, v40, 12 +; FLATSCR-NEXT: v_readlane_b32 s66, v40, 11 +; FLATSCR-NEXT: v_readlane_b32 s65, v40, 10 +; FLATSCR-NEXT: v_readlane_b32 s64, v40, 9 +; FLATSCR-NEXT: v_readlane_b32 s55, v40, 8 +; FLATSCR-NEXT: v_readlane_b32 s54, v40, 7 +; FLATSCR-NEXT: v_readlane_b32 s53, v40, 6 +; FLATSCR-NEXT: v_readlane_b32 s52, v40, 5 +; FLATSCR-NEXT: v_readlane_b32 s51, v40, 4 +; FLATSCR-NEXT: v_readlane_b32 s50, v40, 3 +; FLATSCR-NEXT: v_readlane_b32 s49, v40, 2 +; FLATSCR-NEXT: v_readlane_b32 s48, v40, 1 +; FLATSCR-NEXT: v_readlane_b32 s39, v40, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_or_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload @@ -1731,48 +1711,48 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-LABEL: spill_fp_to_memory_scratch_reg_needed_mubuf_offset: ; MUBUF: ; %bb.0: ; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: s_mov_b32 s38, s33 +; MUBUF-NEXT: s_mov_b32 s4, s33 ; MUBUF-NEXT: s_mov_b32 s33, s32 -; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 -; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: v_writelane_b32 v39, s46, 0 -; MUBUF-NEXT: v_writelane_b32 v39, s47, 1 -; MUBUF-NEXT: v_writelane_b32 v39, s48, 2 -; MUBUF-NEXT: v_writelane_b32 v39, s49, 3 -; MUBUF-NEXT: v_writelane_b32 v39, s50, 4 -; MUBUF-NEXT: v_writelane_b32 v39, s51, 5 -; MUBUF-NEXT: v_writelane_b32 v39, s52, 6 -; MUBUF-NEXT: v_writelane_b32 v39, s53, 7 -; MUBUF-NEXT: v_writelane_b32 v39, s62, 8 -; MUBUF-NEXT: v_writelane_b32 v39, s63, 9 -; MUBUF-NEXT: v_writelane_b32 v39, s64, 10 -; MUBUF-NEXT: v_writelane_b32 v39, s65, 11 -; MUBUF-NEXT: v_writelane_b32 v39, s66, 12 -; MUBUF-NEXT: v_writelane_b32 v39, s67, 13 -; MUBUF-NEXT: v_writelane_b32 v39, s68, 14 -; MUBUF-NEXT: v_writelane_b32 v39, s69, 15 -; MUBUF-NEXT: v_writelane_b32 v39, s78, 16 -; MUBUF-NEXT: v_writelane_b32 v39, s79, 17 -; MUBUF-NEXT: v_writelane_b32 v39, s80, 18 -; MUBUF-NEXT: v_writelane_b32 v39, s81, 19 -; MUBUF-NEXT: v_writelane_b32 v39, s82, 20 -; MUBUF-NEXT: v_writelane_b32 v39, s83, 21 -; MUBUF-NEXT: v_writelane_b32 v39, s84, 22 -; MUBUF-NEXT: v_writelane_b32 v39, s85, 23 -; MUBUF-NEXT: v_writelane_b32 v39, s94, 24 -; MUBUF-NEXT: v_writelane_b32 v39, s95, 25 -; MUBUF-NEXT: v_writelane_b32 v39, s96, 26 -; MUBUF-NEXT: v_writelane_b32 v39, s97, 27 -; MUBUF-NEXT: v_writelane_b32 v39, s98, 28 -; MUBUF-NEXT: v_writelane_b32 v39, s99, 29 -; MUBUF-NEXT: v_writelane_b32 v39, s100, 30 -; MUBUF-NEXT: v_writelane_b32 v39, s101, 31 +; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100 +; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: v_writelane_b32 v39, s4, 32 +; MUBUF-NEXT: v_writelane_b32 v39, s39, 0 +; MUBUF-NEXT: v_writelane_b32 v39, s48, 1 +; MUBUF-NEXT: v_writelane_b32 v39, s49, 2 +; MUBUF-NEXT: v_writelane_b32 v39, s50, 3 +; MUBUF-NEXT: v_writelane_b32 v39, s51, 4 +; MUBUF-NEXT: v_writelane_b32 v39, s52, 5 +; MUBUF-NEXT: v_writelane_b32 v39, s53, 6 +; MUBUF-NEXT: v_writelane_b32 v39, s54, 7 +; MUBUF-NEXT: v_writelane_b32 v39, s55, 8 +; MUBUF-NEXT: v_writelane_b32 v39, s64, 9 +; MUBUF-NEXT: v_writelane_b32 v39, s65, 10 +; MUBUF-NEXT: v_writelane_b32 v39, s66, 11 +; MUBUF-NEXT: v_writelane_b32 v39, s67, 12 +; MUBUF-NEXT: v_writelane_b32 v39, s68, 13 +; MUBUF-NEXT: v_writelane_b32 v39, s69, 14 +; MUBUF-NEXT: v_writelane_b32 v39, s70, 15 +; MUBUF-NEXT: v_writelane_b32 v39, s71, 16 +; MUBUF-NEXT: v_writelane_b32 v39, s80, 17 +; MUBUF-NEXT: v_writelane_b32 v39, s81, 18 +; MUBUF-NEXT: v_writelane_b32 v39, s82, 19 +; MUBUF-NEXT: v_writelane_b32 v39, s83, 20 +; MUBUF-NEXT: v_writelane_b32 v39, s84, 21 +; MUBUF-NEXT: v_writelane_b32 v39, s85, 22 +; MUBUF-NEXT: v_writelane_b32 v39, s86, 23 +; MUBUF-NEXT: v_writelane_b32 v39, s87, 24 +; MUBUF-NEXT: v_writelane_b32 v39, s96, 25 +; MUBUF-NEXT: v_writelane_b32 v39, s97, 26 +; MUBUF-NEXT: v_writelane_b32 v39, s98, 27 +; MUBUF-NEXT: v_writelane_b32 v39, s99, 28 +; MUBUF-NEXT: v_writelane_b32 v39, s100, 29 +; MUBUF-NEXT: v_writelane_b32 v39, s101, 30 ; MUBUF-NEXT: v_mov_b32_e32 v0, 0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1000 ; MUBUF-NEXT: s_add_i32 s32, s32, 0x40300 -; MUBUF-NEXT: v_writelane_b32 v39, s102, 32 +; MUBUF-NEXT: v_writelane_b32 v39, s102, 31 ; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: ;;#ASMSTART @@ -1781,45 +1761,45 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; MUBUF-NEXT: ;;#ASMSTART ; MUBUF-NEXT: ; clobber all VGPRs except CSR v40 ; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_readlane_b32 s102, v39, 32 -; MUBUF-NEXT: v_readlane_b32 s101, v39, 31 -; MUBUF-NEXT: v_readlane_b32 s100, v39, 30 -; MUBUF-NEXT: v_readlane_b32 s99, v39, 29 -; MUBUF-NEXT: v_readlane_b32 s98, v39, 28 -; MUBUF-NEXT: v_readlane_b32 s97, v39, 27 -; MUBUF-NEXT: v_readlane_b32 s96, v39, 26 -; MUBUF-NEXT: v_readlane_b32 s95, v39, 25 -; MUBUF-NEXT: v_readlane_b32 s94, v39, 24 -; MUBUF-NEXT: v_readlane_b32 s85, v39, 23 -; MUBUF-NEXT: v_readlane_b32 s84, v39, 22 -; MUBUF-NEXT: v_readlane_b32 s83, v39, 21 -; MUBUF-NEXT: v_readlane_b32 s82, v39, 20 -; MUBUF-NEXT: v_readlane_b32 s81, v39, 19 -; MUBUF-NEXT: v_readlane_b32 s80, v39, 18 -; MUBUF-NEXT: v_readlane_b32 s79, v39, 17 -; MUBUF-NEXT: v_readlane_b32 s78, v39, 16 -; MUBUF-NEXT: v_readlane_b32 s69, v39, 15 -; MUBUF-NEXT: v_readlane_b32 s68, v39, 14 -; MUBUF-NEXT: v_readlane_b32 s67, v39, 13 -; MUBUF-NEXT: v_readlane_b32 s66, v39, 12 -; MUBUF-NEXT: v_readlane_b32 s65, v39, 11 -; MUBUF-NEXT: v_readlane_b32 s64, v39, 10 -; MUBUF-NEXT: v_readlane_b32 s63, v39, 9 -; MUBUF-NEXT: v_readlane_b32 s62, v39, 8 -; MUBUF-NEXT: v_readlane_b32 s53, v39, 7 -; MUBUF-NEXT: v_readlane_b32 s52, v39, 6 -; MUBUF-NEXT: v_readlane_b32 s51, v39, 5 -; MUBUF-NEXT: v_readlane_b32 s50, v39, 4 -; MUBUF-NEXT: v_readlane_b32 s49, v39, 3 -; MUBUF-NEXT: v_readlane_b32 s48, v39, 2 -; MUBUF-NEXT: v_readlane_b32 s47, v39, 1 -; MUBUF-NEXT: v_readlane_b32 s46, v39, 0 +; MUBUF-NEXT: v_readlane_b32 s102, v39, 31 +; MUBUF-NEXT: v_readlane_b32 s101, v39, 30 +; MUBUF-NEXT: v_readlane_b32 s100, v39, 29 +; MUBUF-NEXT: v_readlane_b32 s99, v39, 28 +; MUBUF-NEXT: v_readlane_b32 s98, v39, 27 +; MUBUF-NEXT: v_readlane_b32 s97, v39, 26 +; MUBUF-NEXT: v_readlane_b32 s96, v39, 25 +; MUBUF-NEXT: v_readlane_b32 s87, v39, 24 +; MUBUF-NEXT: v_readlane_b32 s86, v39, 23 +; MUBUF-NEXT: v_readlane_b32 s85, v39, 22 +; MUBUF-NEXT: v_readlane_b32 s84, v39, 21 +; MUBUF-NEXT: v_readlane_b32 s83, v39, 20 +; MUBUF-NEXT: v_readlane_b32 s82, v39, 19 +; MUBUF-NEXT: v_readlane_b32 s81, v39, 18 +; MUBUF-NEXT: v_readlane_b32 s80, v39, 17 +; MUBUF-NEXT: v_readlane_b32 s71, v39, 16 +; MUBUF-NEXT: v_readlane_b32 s70, v39, 15 +; MUBUF-NEXT: v_readlane_b32 s69, v39, 14 +; MUBUF-NEXT: v_readlane_b32 s68, v39, 13 +; MUBUF-NEXT: v_readlane_b32 s67, v39, 12 +; MUBUF-NEXT: v_readlane_b32 s66, v39, 11 +; MUBUF-NEXT: v_readlane_b32 s65, v39, 10 +; MUBUF-NEXT: v_readlane_b32 s64, v39, 9 +; MUBUF-NEXT: v_readlane_b32 s55, v39, 8 +; MUBUF-NEXT: v_readlane_b32 s54, v39, 7 +; MUBUF-NEXT: v_readlane_b32 s53, v39, 6 +; MUBUF-NEXT: v_readlane_b32 s52, v39, 5 +; MUBUF-NEXT: v_readlane_b32 s51, v39, 4 +; MUBUF-NEXT: v_readlane_b32 s50, v39, 3 +; MUBUF-NEXT: v_readlane_b32 s49, v39, 2 +; MUBUF-NEXT: v_readlane_b32 s48, v39, 1 +; MUBUF-NEXT: v_readlane_b32 s39, v39, 0 ; MUBUF-NEXT: s_mov_b32 s32, s33 -; MUBUF-NEXT: s_xor_saveexec_b64 s[4:5], -1 -; MUBUF-NEXT: s_add_i32 s6, s33, 0x40100 -; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s6 ; 4-byte Folded Reload -; MUBUF-NEXT: s_mov_b64 exec, s[4:5] -; MUBUF-NEXT: s_mov_b32 s33, s38 +; MUBUF-NEXT: v_readlane_b32 s4, v39, 32 +; MUBUF-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; MUBUF-NEXT: s_add_i32 s5, s33, 0x40100 +; MUBUF-NEXT: buffer_load_dword v39, off, s[0:3], s5 ; 4-byte Folded Reload +; MUBUF-NEXT: s_mov_b64 exec, s[6:7] +; MUBUF-NEXT: s_mov_b32 s33, s4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_setpc_b64 s[30:31] ; @@ -1832,42 +1812,41 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004 ; FLATSCR-NEXT: scratch_store_dword off, v39, s1 ; 4-byte Folded Spill ; FLATSCR-NEXT: s_mov_b64 exec, s[2:3] -; FLATSCR-NEXT: v_writelane_b32 v39, s46, 0 -; FLATSCR-NEXT: v_writelane_b32 v39, s47, 1 -; FLATSCR-NEXT: v_writelane_b32 v39, s48, 2 -; FLATSCR-NEXT: v_writelane_b32 v39, s49, 3 -; FLATSCR-NEXT: v_writelane_b32 v39, s50, 4 -; FLATSCR-NEXT: v_writelane_b32 v39, s51, 5 -; FLATSCR-NEXT: v_writelane_b32 v39, s52, 6 -; FLATSCR-NEXT: v_writelane_b32 v39, s53, 7 -; FLATSCR-NEXT: v_writelane_b32 v39, s62, 8 -; FLATSCR-NEXT: v_writelane_b32 v39, s63, 9 -; FLATSCR-NEXT: v_writelane_b32 v39, s64, 10 -; FLATSCR-NEXT: v_writelane_b32 v39, s65, 11 -; FLATSCR-NEXT: v_writelane_b32 v39, s66, 12 -; FLATSCR-NEXT: v_writelane_b32 v39, s67, 13 -; FLATSCR-NEXT: v_writelane_b32 v39, s68, 14 -; FLATSCR-NEXT: v_writelane_b32 v39, s69, 15 -; FLATSCR-NEXT: v_writelane_b32 v39, s78, 16 -; FLATSCR-NEXT: v_writelane_b32 v39, s79, 17 -; FLATSCR-NEXT: v_writelane_b32 v39, s80, 18 -; FLATSCR-NEXT: v_writelane_b32 v39, s81, 19 -; FLATSCR-NEXT: v_writelane_b32 v39, s82, 20 -; FLATSCR-NEXT: v_writelane_b32 v39, s83, 21 -; FLATSCR-NEXT: v_writelane_b32 v39, s84, 22 -; FLATSCR-NEXT: v_writelane_b32 v39, s85, 23 -; FLATSCR-NEXT: v_writelane_b32 v39, s94, 24 -; FLATSCR-NEXT: v_writelane_b32 v39, s95, 25 -; FLATSCR-NEXT: v_writelane_b32 v39, s96, 26 -; FLATSCR-NEXT: v_writelane_b32 v39, s97, 27 -; FLATSCR-NEXT: v_writelane_b32 v39, s98, 28 -; FLATSCR-NEXT: v_writelane_b32 v39, s99, 29 +; FLATSCR-NEXT: v_writelane_b32 v39, s39, 0 +; FLATSCR-NEXT: v_writelane_b32 v39, s48, 1 +; FLATSCR-NEXT: v_writelane_b32 v39, s49, 2 +; FLATSCR-NEXT: v_writelane_b32 v39, s50, 3 +; FLATSCR-NEXT: v_writelane_b32 v39, s51, 4 +; FLATSCR-NEXT: v_writelane_b32 v39, s52, 5 +; FLATSCR-NEXT: v_writelane_b32 v39, s53, 6 +; FLATSCR-NEXT: v_writelane_b32 v39, s54, 7 +; FLATSCR-NEXT: v_writelane_b32 v39, s55, 8 +; FLATSCR-NEXT: v_writelane_b32 v39, s64, 9 +; FLATSCR-NEXT: v_writelane_b32 v39, s65, 10 +; FLATSCR-NEXT: v_writelane_b32 v39, s66, 11 +; FLATSCR-NEXT: v_writelane_b32 v39, s67, 12 +; FLATSCR-NEXT: v_writelane_b32 v39, s68, 13 +; FLATSCR-NEXT: v_writelane_b32 v39, s69, 14 +; FLATSCR-NEXT: v_writelane_b32 v39, s70, 15 +; FLATSCR-NEXT: v_writelane_b32 v39, s71, 16 +; FLATSCR-NEXT: v_writelane_b32 v39, s80, 17 +; FLATSCR-NEXT: v_writelane_b32 v39, s81, 18 +; FLATSCR-NEXT: v_writelane_b32 v39, s82, 19 +; FLATSCR-NEXT: v_writelane_b32 v39, s83, 20 +; FLATSCR-NEXT: v_writelane_b32 v39, s84, 21 +; FLATSCR-NEXT: v_writelane_b32 v39, s85, 22 +; FLATSCR-NEXT: v_writelane_b32 v39, s86, 23 +; FLATSCR-NEXT: v_writelane_b32 v39, s87, 24 +; FLATSCR-NEXT: v_writelane_b32 v39, s96, 25 +; FLATSCR-NEXT: v_writelane_b32 v39, s97, 26 +; FLATSCR-NEXT: v_writelane_b32 v39, s98, 27 +; FLATSCR-NEXT: v_writelane_b32 v39, s99, 28 ; FLATSCR-NEXT: s_addk_i32 s32, 0x100c -; FLATSCR-NEXT: v_writelane_b32 v39, s100, 30 -; FLATSCR-NEXT: v_writelane_b32 v39, s101, 31 +; FLATSCR-NEXT: v_writelane_b32 v39, s100, 29 +; FLATSCR-NEXT: v_writelane_b32 v39, s101, 30 ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000 -; FLATSCR-NEXT: v_writelane_b32 v39, s102, 32 +; FLATSCR-NEXT: v_writelane_b32 v39, s102, 31 ; FLATSCR-NEXT: scratch_store_dword off, v0, s1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: ;;#ASMSTART @@ -1876,39 +1855,38 @@ define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset(ptr addrspace(5) ; FLATSCR-NEXT: ;;#ASMSTART ; FLATSCR-NEXT: ; clobber all VGPRs except CSR v40 ; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: v_readlane_b32 s102, v39, 32 -; FLATSCR-NEXT: v_readlane_b32 s101, v39, 31 -; FLATSCR-NEXT: v_readlane_b32 s100, v39, 30 -; FLATSCR-NEXT: v_readlane_b32 s99, v39, 29 -; FLATSCR-NEXT: v_readlane_b32 s98, v39, 28 -; FLATSCR-NEXT: v_readlane_b32 s97, v39, 27 -; FLATSCR-NEXT: v_readlane_b32 s96, v39, 26 -; FLATSCR-NEXT: v_readlane_b32 s95, v39, 25 -; FLATSCR-NEXT: v_readlane_b32 s94, v39, 24 -; FLATSCR-NEXT: v_readlane_b32 s85, v39, 23 -; FLATSCR-NEXT: v_readlane_b32 s84, v39, 22 -; FLATSCR-NEXT: v_readlane_b32 s83, v39, 21 -; FLATSCR-NEXT: v_readlane_b32 s82, v39, 20 -; FLATSCR-NEXT: v_readlane_b32 s81, v39, 19 -; FLATSCR-NEXT: v_readlane_b32 s80, v39, 18 -; FLATSCR-NEXT: v_readlane_b32 s79, v39, 17 -; FLATSCR-NEXT: v_readlane_b32 s78, v39, 16 -; FLATSCR-NEXT: v_readlane_b32 s69, v39, 15 -; FLATSCR-NEXT: v_readlane_b32 s68, v39, 14 -; FLATSCR-NEXT: v_readlane_b32 s67, v39, 13 -; FLATSCR-NEXT: v_readlane_b32 s66, v39, 12 -; FLATSCR-NEXT: v_readlane_b32 s65, v39, 11 -; FLATSCR-NEXT: v_readlane_b32 s64, v39, 10 -; FLATSCR-NEXT: v_readlane_b32 s63, v39, 9 -; FLATSCR-NEXT: v_readlane_b32 s62, v39, 8 -; FLATSCR-NEXT: v_readlane_b32 s53, v39, 7 -; FLATSCR-NEXT: v_readlane_b32 s52, v39, 6 -; FLATSCR-NEXT: v_readlane_b32 s51, v39, 5 -; FLATSCR-NEXT: v_readlane_b32 s50, v39, 4 -; FLATSCR-NEXT: v_readlane_b32 s49, v39, 3 -; FLATSCR-NEXT: v_readlane_b32 s48, v39, 2 -; FLATSCR-NEXT: v_readlane_b32 s47, v39, 1 -; FLATSCR-NEXT: v_readlane_b32 s46, v39, 0 +; FLATSCR-NEXT: v_readlane_b32 s102, v39, 31 +; FLATSCR-NEXT: v_readlane_b32 s101, v39, 30 +; FLATSCR-NEXT: v_readlane_b32 s100, v39, 29 +; FLATSCR-NEXT: v_readlane_b32 s99, v39, 28 +; FLATSCR-NEXT: v_readlane_b32 s98, v39, 27 +; FLATSCR-NEXT: v_readlane_b32 s97, v39, 26 +; FLATSCR-NEXT: v_readlane_b32 s96, v39, 25 +; FLATSCR-NEXT: v_readlane_b32 s87, v39, 24 +; FLATSCR-NEXT: v_readlane_b32 s86, v39, 23 +; FLATSCR-NEXT: v_readlane_b32 s85, v39, 22 +; FLATSCR-NEXT: v_readlane_b32 s84, v39, 21 +; FLATSCR-NEXT: v_readlane_b32 s83, v39, 20 +; FLATSCR-NEXT: v_readlane_b32 s82, v39, 19 +; FLATSCR-NEXT: v_readlane_b32 s81, v39, 18 +; FLATSCR-NEXT: v_readlane_b32 s80, v39, 17 +; FLATSCR-NEXT: v_readlane_b32 s71, v39, 16 +; FLATSCR-NEXT: v_readlane_b32 s70, v39, 15 +; FLATSCR-NEXT: v_readlane_b32 s69, v39, 14 +; FLATSCR-NEXT: v_readlane_b32 s68, v39, 13 +; FLATSCR-NEXT: v_readlane_b32 s67, v39, 12 +; FLATSCR-NEXT: v_readlane_b32 s66, v39, 11 +; FLATSCR-NEXT: v_readlane_b32 s65, v39, 10 +; FLATSCR-NEXT: v_readlane_b32 s64, v39, 9 +; FLATSCR-NEXT: v_readlane_b32 s55, v39, 8 +; FLATSCR-NEXT: v_readlane_b32 s54, v39, 7 +; FLATSCR-NEXT: v_readlane_b32 s53, v39, 6 +; FLATSCR-NEXT: v_readlane_b32 s52, v39, 5 +; FLATSCR-NEXT: v_readlane_b32 s51, v39, 4 +; FLATSCR-NEXT: v_readlane_b32 s50, v39, 3 +; FLATSCR-NEXT: v_readlane_b32 s49, v39, 2 +; FLATSCR-NEXT: v_readlane_b32 s48, v39, 1 +; FLATSCR-NEXT: v_readlane_b32 s39, v39, 0 ; FLATSCR-NEXT: s_mov_b32 s32, s33 ; FLATSCR-NEXT: s_xor_saveexec_b64 s[2:3], -1 ; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1004 diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index 0a3bf35427e24..6504f48333485 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -14,13 +14,7 @@ body: | ; CHECK-LABEL: name: def_csr_sgpr ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr46, $sgpr47 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 0, $vgpr0 - ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 1, $vgpr0 ; CHECK-NEXT: S_NOP 0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll index 4c2e3f426d29f..9b91a3dc9b6e4 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll @@ -1321,19 +1321,19 @@ bb: define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspace(3) %arg) { ; CI-LABEL: ds_read_call_read: ; CI: ; %bb.0: -; CI-NEXT: s_getpc_b64 s[64:65] -; CI-NEXT: s_mov_b32 s64, s0 -; CI-NEXT: s_load_dwordx4 s[64:67], s[64:65], 0x0 +; CI-NEXT: s_getpc_b64 s[48:49] +; CI-NEXT: s_mov_b32 s48, s0 +; CI-NEXT: s_load_dwordx4 s[48:51], s[48:49], 0x0 ; CI-NEXT: s_mov_b32 s14, s10 ; CI-NEXT: v_lshlrev_b32_e32 v3, 2, v0 ; CI-NEXT: s_mov_b32 m0, -1 ; CI-NEXT: s_mov_b32 s12, s8 ; CI-NEXT: s_waitcnt lgkmcnt(0) -; CI-NEXT: s_add_u32 s64, s64, s11 +; CI-NEXT: s_add_u32 s48, s48, s11 ; CI-NEXT: s_mov_b64 s[10:11], s[6:7] -; CI-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x0 +; CI-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0 ; CI-NEXT: s_load_dword s6, s[4:5], 0x2 -; CI-NEXT: s_addc_u32 s65, s65, 0 +; CI-NEXT: s_addc_u32 s49, s49, 0 ; CI-NEXT: s_add_u32 s8, s4, 12 ; CI-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; CI-NEXT: s_mov_b32 s13, s9 @@ -1345,36 +1345,36 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; CI-NEXT: v_or_b32_e32 v0, v0, v1 ; CI-NEXT: s_mov_b64 s[4:5], s[0:1] ; CI-NEXT: s_mov_b64 s[6:7], s[2:3] -; CI-NEXT: s_mov_b64 s[0:1], s[64:65] +; CI-NEXT: s_mov_b64 s[0:1], s[48:49] ; CI-NEXT: s_mov_b32 s17, void_func_void@abs32@hi ; CI-NEXT: s_mov_b32 s16, void_func_void@abs32@lo ; CI-NEXT: v_or_b32_e32 v31, v0, v2 -; CI-NEXT: s_mov_b64 s[2:3], s[66:67] +; CI-NEXT: s_mov_b64 s[2:3], s[50:51] ; CI-NEXT: s_mov_b32 s32, 0 -; CI-NEXT: s_mov_b32 s51, 0xf000 -; CI-NEXT: s_mov_b32 s50, -1 +; CI-NEXT: s_mov_b32 s39, 0xf000 +; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CI-NEXT: ds_read_b32 v0, v40 offset:4 ; CI-NEXT: s_waitcnt lgkmcnt(0) ; CI-NEXT: v_add_i32_e32 v0, vcc, v41, v0 -; CI-NEXT: buffer_store_dword v0, off, s[48:51], 0 +; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: ds_read_call_read: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_getpc_b64 s[48:49] -; GFX9-NEXT: s_mov_b32 s48, s0 -; GFX9-NEXT: s_load_dwordx4 s[48:51], s[48:49], 0x0 +; GFX9-NEXT: s_getpc_b64 s[36:37] +; GFX9-NEXT: s_mov_b32 s36, s0 +; GFX9-NEXT: s_load_dwordx4 s[36:39], s[36:37], 0x0 ; GFX9-NEXT: s_mov_b32 s14, s10 ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_mov_b32 s13, s9 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_add_u32 s36, s36, s11 ; GFX9-NEXT: s_mov_b64 s[10:11], s[6:7] ; GFX9-NEXT: s_load_dword s6, s[4:5], 0x8 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_add_u32 s8, s4, 12 ; GFX9-NEXT: s_addc_u32 s9, s5, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1383,11 +1383,11 @@ define amdgpu_kernel void @ds_read_call_read(ptr addrspace(1) %out, ptr addrspac ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: s_mov_b32 s17, void_func_void@abs32@hi ; GFX9-NEXT: s_mov_b32 s16, void_func_void@abs32@lo ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: v_mov_b32_e32 v40, 0 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll index f671ea5f10cd8..40cdfd76d6af6 100644 --- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll @@ -28,8 +28,8 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: v_writelane_b32 v41, s35, 3 ; CHECK-NEXT: v_writelane_b32 v41, s36, 4 ; CHECK-NEXT: v_writelane_b32 v41, s37, 5 -; CHECK-NEXT: v_writelane_b32 v41, s46, 6 -; CHECK-NEXT: v_writelane_b32 v41, s47, 7 +; CHECK-NEXT: v_writelane_b32 v41, s38, 6 +; CHECK-NEXT: v_writelane_b32 v41, s39, 7 ; CHECK-NEXT: v_writelane_b32 v41, s48, 8 ; CHECK-NEXT: v_writelane_b32 v41, s49, 9 ; CHECK-NEXT: v_writelane_b32 v41, s50, 10 @@ -37,7 +37,7 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: v_writelane_b32 v41, s52, 12 ; CHECK-NEXT: s_addk_i32 s32, 0x400 ; CHECK-NEXT: v_writelane_b32 v41, s53, 13 -; CHECK-NEXT: v_writelane_b32 v41, s62, 14 +; CHECK-NEXT: v_writelane_b32 v41, s54, 14 ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef ; CHECK-NEXT: .Ltmp0: @@ -45,8 +45,8 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared@gotpcrel32@hi+12 -; CHECK-NEXT: v_writelane_b32 v41, s63, 15 -; CHECK-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0 +; CHECK-NEXT: v_writelane_b32 v41, s55, 15 +; CHECK-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill ; CHECK-NEXT: v_mov_b32_e32 v40, v31 @@ -56,11 +56,11 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55] ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] ; CHECK-NEXT: s_mov_b32 s12, s53 @@ -68,23 +68,23 @@ define weak_odr void @test(i32 %0) !dbg !34 { ; CHECK-NEXT: s_mov_b32 s14, s51 ; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 -; CHECK-NEXT: s_swappc_b64 s[30:31], s[62:63] +; CHECK-NEXT: s_swappc_b64 s[30:31], s[54:55] ; CHECK-NEXT: .Ltmp1: ; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- [$vgpr0_vgpr1+0] ; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: flat_store_dword v[0:1], v2 -; CHECK-NEXT: v_readlane_b32 s63, v41, 15 -; CHECK-NEXT: v_readlane_b32 s62, v41, 14 +; CHECK-NEXT: v_readlane_b32 s55, v41, 15 +; CHECK-NEXT: v_readlane_b32 s54, v41, 14 ; CHECK-NEXT: v_readlane_b32 s53, v41, 13 ; CHECK-NEXT: v_readlane_b32 s52, v41, 12 ; CHECK-NEXT: v_readlane_b32 s51, v41, 11 ; CHECK-NEXT: v_readlane_b32 s50, v41, 10 ; CHECK-NEXT: v_readlane_b32 s49, v41, 9 ; CHECK-NEXT: v_readlane_b32 s48, v41, 8 -; CHECK-NEXT: v_readlane_b32 s47, v41, 7 -; CHECK-NEXT: v_readlane_b32 s46, v41, 6 +; CHECK-NEXT: v_readlane_b32 s39, v41, 7 +; CHECK-NEXT: v_readlane_b32 s38, v41, 6 ; CHECK-NEXT: v_readlane_b32 s37, v41, 5 ; CHECK-NEXT: v_readlane_b32 s36, v41, 4 ; CHECK-NEXT: v_readlane_b32 s35, v41, 3 diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir index 4dd03a17f7caa..7f370b2cca658 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir @@ -142,8 +142,8 @@ body: | ; GFX1100-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22 ; GFX1100-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23 ; GFX1100-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27 - ; GFX1100-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc - ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38 + ; GFX1100-NEXT: $sgpr40 = S_ADD_I32 $sgpr32, 8, implicit-def $scc + ; GFX1100-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr40 ; GFX1100-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec ; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec ; GFX1100-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec @@ -167,8 +167,8 @@ body: | ; GFX1200-NEXT: renamable $sgpr20 = S_MOV_B32 killed $sgpr22 ; GFX1200-NEXT: undef renamable $sgpr22 = COPY killed undef renamable $sgpr22, implicit-def $sgpr22_sgpr23 ; GFX1200-NEXT: undef renamable $sgpr26 = COPY killed undef renamable $sgpr26, implicit-def $sgpr26_sgpr27 - ; GFX1200-NEXT: $sgpr38 = S_ADD_I32 $sgpr32, 8, implicit-def $scc - ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr38 + ; GFX1200-NEXT: $sgpr40 = S_ADD_I32 $sgpr32, 8, implicit-def $scc + ; GFX1200-NEXT: renamable $sgpr31 = S_MOV_B32 killed $sgpr40 ; GFX1200-NEXT: renamable $vgpr3 = COPY killed renamable $sgpr30, implicit $exec ; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY renamable $sgpr28_sgpr29, implicit $exec ; GFX1200-NEXT: renamable $vgpr0_vgpr1 = COPY killed renamable $vcc, implicit $exec @@ -706,7 +706,7 @@ body: | ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: $sgpr4 = S_MOV_B32 24 - ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec + ; GFX8-NEXT: $vgpr0, dead $sgpr72_sgpr73 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -809,10 +809,10 @@ body: | ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc - ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc - ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 - ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 + ; GFX1100-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc + ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc + ; GFX1100-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72 + ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -836,10 +836,10 @@ body: | ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc - ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc - ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 - ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 + ; GFX1200-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 24, implicit-def $scc, implicit $scc + ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc + ; GFX1200-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72 + ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -901,7 +901,7 @@ body: | ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec ; GFX8-NEXT: $sgpr4 = S_MOV_B32 68 - ; GFX8-NEXT: $vgpr0, dead $sgpr70_sgpr71 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec + ; GFX8-NEXT: $vgpr0, dead $sgpr72_sgpr73 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec ; GFX8-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX8-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 @@ -1004,10 +1004,10 @@ body: | ; GFX1100-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1100-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1100-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1100-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc - ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc - ; GFX1100-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 - ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 + ; GFX1100-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc + ; GFX1100-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc + ; GFX1100-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72 + ; GFX1100-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1100-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 @@ -1031,10 +1031,10 @@ body: | ; GFX1200-NEXT: S_NOP 0, implicit-def $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 ; GFX1200-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc_lo, implicit $exec ; GFX1200-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc - ; GFX1200-NEXT: $sgpr70 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc - ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr70, 0, implicit-def $scc - ; GFX1200-NEXT: $sgpr70 = S_BITSET0_B32 0, $sgpr70 - ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr70 + ; GFX1200-NEXT: $sgpr72 = S_ADDC_U32 $sgpr32, 68, implicit-def $scc, implicit $scc + ; GFX1200-NEXT: S_BITCMP1_B32 $sgpr72, 0, implicit-def $scc + ; GFX1200-NEXT: $sgpr72 = S_BITSET0_B32 0, $sgpr72 + ; GFX1200-NEXT: renamable $sgpr4 = S_MOV_B32 killed $sgpr72 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 ; GFX1200-NEXT: S_NOP 0, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll index ff2fb986e7828..dc20ae3765069 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -2060,9 +2060,9 @@ define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %p ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s29, s33 ; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_or_saveexec_b64 s[38:39], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[40:41], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[38:39] +; GFX9-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-NEXT: v_mov_b32_e32 v2, s28 ; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 ; GFX9-NEXT: v_mov_b32_e32 v5, s27 diff --git a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll index 3e84aa37fbcaa..512d58d3f996d 100644 --- a/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/function-resource-usage.ll @@ -202,18 +202,18 @@ define void @indirect_use_50_vgpr() #0 { } ; GCN-LABEL: {{^}}use_80_sgpr: -; GCN: .set use_80_sgpr.num_vgpr, 1 +; GCN: .set use_80_sgpr.num_vgpr, 0 ; GCN: .set use_80_sgpr.num_agpr, 0 ; GCN: .set use_80_sgpr.numbered_sgpr, 80 -; GCN: .set use_80_sgpr.private_seg_size, 8 +; GCN: .set use_80_sgpr.private_seg_size, 0 ; GCN: .set use_80_sgpr.uses_vcc, 0 ; GCN: .set use_80_sgpr.uses_flat_scratch, 0 ; GCN: .set use_80_sgpr.has_dyn_sized_stack, 0 ; GCN: .set use_80_sgpr.has_recursion, 0 ; GCN: .set use_80_sgpr.has_indirect_call, 0 ; GCN: TotalNumSgprs: 84 -; GCN: NumVgprs: 1 -; GCN: ScratchSize: 8 +; GCN: NumVgprs: 0 +; GCN: ScratchSize: 0 define void @use_80_sgpr() #1 { call void asm sideeffect "", "~{s79}"() #0 ret void @@ -231,7 +231,7 @@ define void @use_80_sgpr() #1 { ; GCN: .set indirect_use_80_sgpr.has_indirect_call, or(0, use_80_sgpr.has_indirect_call) ; GCN: TotalNumSgprs: 84 ; GCN: NumVgprs: 41 -; GCN: ScratchSize: 24 +; GCN: ScratchSize: 16 define void @indirect_use_80_sgpr() #1 { call void @use_80_sgpr() ret void @@ -249,7 +249,7 @@ define void @indirect_use_80_sgpr() #1 { ; GCN: .set indirect_2_level_use_80_sgpr.has_indirect_call, or(0, indirect_use_80_sgpr.has_indirect_call) ; GCN: TotalNumSgprs: 86 ; GCN: NumVgprs: 41 -; GCN: ScratchSize: 24 +; GCN: ScratchSize: 16 define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 { call void @indirect_use_80_sgpr() ret void diff --git a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll index be12d4be59106..8ac187eacf1fe 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll @@ -41,43 +41,43 @@ define amdgpu_gfx void @gfx_func() { ; SDAG-NEXT: v_writelane_b32 v40, s29, 25 ; SDAG-NEXT: v_writelane_b32 v40, s30, 26 ; SDAG-NEXT: v_writelane_b32 v40, s31, 27 -; SDAG-NEXT: v_writelane_b32 v40, s70, 28 -; SDAG-NEXT: v_writelane_b32 v40, s71, 29 -; SDAG-NEXT: v_writelane_b32 v40, s72, 30 -; SDAG-NEXT: v_writelane_b32 v40, s73, 31 -; SDAG-NEXT: v_writelane_b32 v40, s74, 32 -; SDAG-NEXT: v_writelane_b32 v40, s75, 33 -; SDAG-NEXT: v_writelane_b32 v40, s76, 34 -; SDAG-NEXT: v_writelane_b32 v40, s77, 35 -; SDAG-NEXT: v_writelane_b32 v40, s86, 36 -; SDAG-NEXT: v_writelane_b32 v40, s87, 37 -; SDAG-NEXT: v_writelane_b32 v40, s88, 38 -; SDAG-NEXT: v_writelane_b32 v40, s89, 39 -; SDAG-NEXT: v_writelane_b32 v40, s90, 40 -; SDAG-NEXT: v_writelane_b32 v40, s91, 41 -; SDAG-NEXT: v_writelane_b32 v40, s92, 42 +; SDAG-NEXT: v_writelane_b32 v40, s72, 28 +; SDAG-NEXT: v_writelane_b32 v40, s73, 29 +; SDAG-NEXT: v_writelane_b32 v40, s74, 30 +; SDAG-NEXT: v_writelane_b32 v40, s75, 31 +; SDAG-NEXT: v_writelane_b32 v40, s76, 32 +; SDAG-NEXT: v_writelane_b32 v40, s77, 33 +; SDAG-NEXT: v_writelane_b32 v40, s78, 34 +; SDAG-NEXT: v_writelane_b32 v40, s79, 35 +; SDAG-NEXT: v_writelane_b32 v40, s88, 36 +; SDAG-NEXT: v_writelane_b32 v40, s89, 37 +; SDAG-NEXT: v_writelane_b32 v40, s90, 38 +; SDAG-NEXT: v_writelane_b32 v40, s91, 39 +; SDAG-NEXT: v_writelane_b32 v40, s92, 40 +; SDAG-NEXT: v_writelane_b32 v40, s93, 41 +; SDAG-NEXT: v_writelane_b32 v40, s94, 42 ; SDAG-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; SDAG-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; SDAG-NEXT: s_mov_b64 s[8:9], 0 ; SDAG-NEXT: s_addk_i32 s32, 0x400 -; SDAG-NEXT: v_writelane_b32 v40, s93, 43 +; SDAG-NEXT: v_writelane_b32 v40, s95, 43 ; SDAG-NEXT: s_swappc_b64 s[30:31], s[34:35] -; SDAG-NEXT: v_readlane_b32 s93, v40, 43 -; SDAG-NEXT: v_readlane_b32 s92, v40, 42 -; SDAG-NEXT: v_readlane_b32 s91, v40, 41 -; SDAG-NEXT: v_readlane_b32 s90, v40, 40 -; SDAG-NEXT: v_readlane_b32 s89, v40, 39 -; SDAG-NEXT: v_readlane_b32 s88, v40, 38 -; SDAG-NEXT: v_readlane_b32 s87, v40, 37 -; SDAG-NEXT: v_readlane_b32 s86, v40, 36 -; SDAG-NEXT: v_readlane_b32 s77, v40, 35 -; SDAG-NEXT: v_readlane_b32 s76, v40, 34 -; SDAG-NEXT: v_readlane_b32 s75, v40, 33 -; SDAG-NEXT: v_readlane_b32 s74, v40, 32 -; SDAG-NEXT: v_readlane_b32 s73, v40, 31 -; SDAG-NEXT: v_readlane_b32 s72, v40, 30 -; SDAG-NEXT: v_readlane_b32 s71, v40, 29 -; SDAG-NEXT: v_readlane_b32 s70, v40, 28 +; SDAG-NEXT: v_readlane_b32 s95, v40, 43 +; SDAG-NEXT: v_readlane_b32 s94, v40, 42 +; SDAG-NEXT: v_readlane_b32 s93, v40, 41 +; SDAG-NEXT: v_readlane_b32 s92, v40, 40 +; SDAG-NEXT: v_readlane_b32 s91, v40, 39 +; SDAG-NEXT: v_readlane_b32 s90, v40, 38 +; SDAG-NEXT: v_readlane_b32 s89, v40, 37 +; SDAG-NEXT: v_readlane_b32 s88, v40, 36 +; SDAG-NEXT: v_readlane_b32 s79, v40, 35 +; SDAG-NEXT: v_readlane_b32 s78, v40, 34 +; SDAG-NEXT: v_readlane_b32 s77, v40, 33 +; SDAG-NEXT: v_readlane_b32 s76, v40, 32 +; SDAG-NEXT: v_readlane_b32 s75, v40, 31 +; SDAG-NEXT: v_readlane_b32 s74, v40, 30 +; SDAG-NEXT: v_readlane_b32 s73, v40, 29 +; SDAG-NEXT: v_readlane_b32 s72, v40, 28 ; SDAG-NEXT: v_readlane_b32 s31, v40, 27 ; SDAG-NEXT: v_readlane_b32 s30, v40, 26 ; SDAG-NEXT: v_readlane_b32 s29, v40, 25 @@ -150,43 +150,43 @@ define amdgpu_gfx void @gfx_func() { ; GISEL-NEXT: v_writelane_b32 v40, s29, 25 ; GISEL-NEXT: v_writelane_b32 v40, s30, 26 ; GISEL-NEXT: v_writelane_b32 v40, s31, 27 -; GISEL-NEXT: v_writelane_b32 v40, s70, 28 -; GISEL-NEXT: v_writelane_b32 v40, s71, 29 -; GISEL-NEXT: v_writelane_b32 v40, s72, 30 -; GISEL-NEXT: v_writelane_b32 v40, s73, 31 -; GISEL-NEXT: v_writelane_b32 v40, s74, 32 -; GISEL-NEXT: v_writelane_b32 v40, s75, 33 -; GISEL-NEXT: v_writelane_b32 v40, s76, 34 -; GISEL-NEXT: v_writelane_b32 v40, s77, 35 -; GISEL-NEXT: v_writelane_b32 v40, s86, 36 -; GISEL-NEXT: v_writelane_b32 v40, s87, 37 -; GISEL-NEXT: v_writelane_b32 v40, s88, 38 -; GISEL-NEXT: v_writelane_b32 v40, s89, 39 -; GISEL-NEXT: v_writelane_b32 v40, s90, 40 -; GISEL-NEXT: v_writelane_b32 v40, s91, 41 -; GISEL-NEXT: v_writelane_b32 v40, s92, 42 +; GISEL-NEXT: v_writelane_b32 v40, s72, 28 +; GISEL-NEXT: v_writelane_b32 v40, s73, 29 +; GISEL-NEXT: v_writelane_b32 v40, s74, 30 +; GISEL-NEXT: v_writelane_b32 v40, s75, 31 +; GISEL-NEXT: v_writelane_b32 v40, s76, 32 +; GISEL-NEXT: v_writelane_b32 v40, s77, 33 +; GISEL-NEXT: v_writelane_b32 v40, s78, 34 +; GISEL-NEXT: v_writelane_b32 v40, s79, 35 +; GISEL-NEXT: v_writelane_b32 v40, s88, 36 +; GISEL-NEXT: v_writelane_b32 v40, s89, 37 +; GISEL-NEXT: v_writelane_b32 v40, s90, 38 +; GISEL-NEXT: v_writelane_b32 v40, s91, 39 +; GISEL-NEXT: v_writelane_b32 v40, s92, 40 +; GISEL-NEXT: v_writelane_b32 v40, s93, 41 +; GISEL-NEXT: v_writelane_b32 v40, s94, 42 ; GISEL-NEXT: s_mov_b32 s34, extern_c_func@abs32@lo ; GISEL-NEXT: s_mov_b32 s35, extern_c_func@abs32@hi ; GISEL-NEXT: s_mov_b64 s[8:9], 0 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s93, 43 +; GISEL-NEXT: v_writelane_b32 v40, s95, 43 ; GISEL-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GISEL-NEXT: v_readlane_b32 s93, v40, 43 -; GISEL-NEXT: v_readlane_b32 s92, v40, 42 -; GISEL-NEXT: v_readlane_b32 s91, v40, 41 -; GISEL-NEXT: v_readlane_b32 s90, v40, 40 -; GISEL-NEXT: v_readlane_b32 s89, v40, 39 -; GISEL-NEXT: v_readlane_b32 s88, v40, 38 -; GISEL-NEXT: v_readlane_b32 s87, v40, 37 -; GISEL-NEXT: v_readlane_b32 s86, v40, 36 -; GISEL-NEXT: v_readlane_b32 s77, v40, 35 -; GISEL-NEXT: v_readlane_b32 s76, v40, 34 -; GISEL-NEXT: v_readlane_b32 s75, v40, 33 -; GISEL-NEXT: v_readlane_b32 s74, v40, 32 -; GISEL-NEXT: v_readlane_b32 s73, v40, 31 -; GISEL-NEXT: v_readlane_b32 s72, v40, 30 -; GISEL-NEXT: v_readlane_b32 s71, v40, 29 -; GISEL-NEXT: v_readlane_b32 s70, v40, 28 +; GISEL-NEXT: v_readlane_b32 s95, v40, 43 +; GISEL-NEXT: v_readlane_b32 s94, v40, 42 +; GISEL-NEXT: v_readlane_b32 s93, v40, 41 +; GISEL-NEXT: v_readlane_b32 s92, v40, 40 +; GISEL-NEXT: v_readlane_b32 s91, v40, 39 +; GISEL-NEXT: v_readlane_b32 s90, v40, 38 +; GISEL-NEXT: v_readlane_b32 s89, v40, 37 +; GISEL-NEXT: v_readlane_b32 s88, v40, 36 +; GISEL-NEXT: v_readlane_b32 s79, v40, 35 +; GISEL-NEXT: v_readlane_b32 s78, v40, 34 +; GISEL-NEXT: v_readlane_b32 s77, v40, 33 +; GISEL-NEXT: v_readlane_b32 s76, v40, 32 +; GISEL-NEXT: v_readlane_b32 s75, v40, 31 +; GISEL-NEXT: v_readlane_b32 s74, v40, 30 +; GISEL-NEXT: v_readlane_b32 s73, v40, 29 +; GISEL-NEXT: v_readlane_b32 s72, v40, 28 ; GISEL-NEXT: v_readlane_b32 s31, v40, 27 ; GISEL-NEXT: v_readlane_b32 s30, v40, 26 ; GISEL-NEXT: v_readlane_b32 s29, v40, 25 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index ef230e4b877b0..2322b29abaa10 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -9091,8 +9091,8 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s35, 3 ; GFX9-NEXT: v_writelane_b32 v40, s36, 4 ; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s46, 6 -; GFX9-NEXT: v_writelane_b32 v40, s47, 7 +; GFX9-NEXT: v_writelane_b32 v40, s38, 6 +; GFX9-NEXT: v_writelane_b32 v40, s39, 7 ; GFX9-NEXT: v_writelane_b32 v40, s48, 8 ; GFX9-NEXT: v_writelane_b32 v40, s49, 9 ; GFX9-NEXT: v_writelane_b32 v40, s50, 10 @@ -9100,25 +9100,25 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s52, 12 ; GFX9-NEXT: v_writelane_b32 v40, s53, 13 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s62, 14 +; GFX9-NEXT: v_writelane_b32 v40, s54, 14 ; GFX9-NEXT: s_mov_b32 s5, byval_align16_f64_arg@abs32@hi ; GFX9-NEXT: s_mov_b32 s4, byval_align16_f64_arg@abs32@lo -; GFX9-NEXT: v_writelane_b32 v40, s63, 15 +; GFX9-NEXT: v_writelane_b32 v40, s55, 15 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s63, v40, 15 -; GFX9-NEXT: v_readlane_b32 s62, v40, 14 +; GFX9-NEXT: v_readlane_b32 s55, v40, 15 +; GFX9-NEXT: v_readlane_b32 s54, v40, 14 ; GFX9-NEXT: v_readlane_b32 s53, v40, 13 ; GFX9-NEXT: v_readlane_b32 s52, v40, 12 ; GFX9-NEXT: v_readlane_b32 s51, v40, 11 ; GFX9-NEXT: v_readlane_b32 s50, v40, 10 ; GFX9-NEXT: v_readlane_b32 s49, v40, 9 ; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s47, v40, 7 -; GFX9-NEXT: v_readlane_b32 s46, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 7 +; GFX9-NEXT: v_readlane_b32 s38, v40, 6 ; GFX9-NEXT: v_readlane_b32 s37, v40, 5 ; GFX9-NEXT: v_readlane_b32 s36, v40, 4 ; GFX9-NEXT: v_readlane_b32 s35, v40, 3 @@ -9159,27 +9159,27 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s35, 3 ; GFX10-NEXT: v_writelane_b32 v40, s36, 4 ; GFX10-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-NEXT: v_writelane_b32 v40, s46, 6 -; GFX10-NEXT: v_writelane_b32 v40, s47, 7 +; GFX10-NEXT: v_writelane_b32 v40, s38, 6 +; GFX10-NEXT: v_writelane_b32 v40, s39, 7 ; GFX10-NEXT: v_writelane_b32 v40, s48, 8 ; GFX10-NEXT: v_writelane_b32 v40, s49, 9 ; GFX10-NEXT: v_writelane_b32 v40, s50, 10 ; GFX10-NEXT: v_writelane_b32 v40, s51, 11 ; GFX10-NEXT: v_writelane_b32 v40, s52, 12 ; GFX10-NEXT: v_writelane_b32 v40, s53, 13 -; GFX10-NEXT: v_writelane_b32 v40, s62, 14 -; GFX10-NEXT: v_writelane_b32 v40, s63, 15 +; GFX10-NEXT: v_writelane_b32 v40, s54, 14 +; GFX10-NEXT: v_writelane_b32 v40, s55, 15 ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s63, v40, 15 -; GFX10-NEXT: v_readlane_b32 s62, v40, 14 +; GFX10-NEXT: v_readlane_b32 s55, v40, 15 +; GFX10-NEXT: v_readlane_b32 s54, v40, 14 ; GFX10-NEXT: v_readlane_b32 s53, v40, 13 ; GFX10-NEXT: v_readlane_b32 s52, v40, 12 ; GFX10-NEXT: v_readlane_b32 s51, v40, 11 ; GFX10-NEXT: v_readlane_b32 s50, v40, 10 ; GFX10-NEXT: v_readlane_b32 s49, v40, 9 ; GFX10-NEXT: v_readlane_b32 s48, v40, 8 -; GFX10-NEXT: v_readlane_b32 s47, v40, 7 -; GFX10-NEXT: v_readlane_b32 s46, v40, 6 +; GFX10-NEXT: v_readlane_b32 s39, v40, 7 +; GFX10-NEXT: v_readlane_b32 s38, v40, 6 ; GFX10-NEXT: v_readlane_b32 s37, v40, 5 ; GFX10-NEXT: v_readlane_b32 s36, v40, 4 ; GFX10-NEXT: v_readlane_b32 s35, v40, 3 @@ -9215,29 +9215,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX11-NEXT: v_writelane_b32 v40, s35, 3 ; GFX11-NEXT: v_writelane_b32 v40, s36, 4 ; GFX11-NEXT: v_writelane_b32 v40, s37, 5 -; GFX11-NEXT: v_writelane_b32 v40, s46, 6 -; GFX11-NEXT: v_writelane_b32 v40, s47, 7 +; GFX11-NEXT: v_writelane_b32 v40, s38, 6 +; GFX11-NEXT: v_writelane_b32 v40, s39, 7 ; GFX11-NEXT: v_writelane_b32 v40, s48, 8 ; GFX11-NEXT: v_writelane_b32 v40, s49, 9 ; GFX11-NEXT: v_writelane_b32 v40, s50, 10 ; GFX11-NEXT: v_writelane_b32 v40, s51, 11 ; GFX11-NEXT: v_writelane_b32 v40, s52, 12 ; GFX11-NEXT: v_writelane_b32 v40, s53, 13 -; GFX11-NEXT: v_writelane_b32 v40, s62, 14 -; GFX11-NEXT: v_writelane_b32 v40, s63, 15 +; GFX11-NEXT: v_writelane_b32 v40, s54, 14 +; GFX11-NEXT: v_writelane_b32 v40, s55, 15 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: scratch_store_b64 off, v[32:33], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX11-NEXT: v_readlane_b32 s63, v40, 15 -; GFX11-NEXT: v_readlane_b32 s62, v40, 14 +; GFX11-NEXT: v_readlane_b32 s55, v40, 15 +; GFX11-NEXT: v_readlane_b32 s54, v40, 14 ; GFX11-NEXT: v_readlane_b32 s53, v40, 13 ; GFX11-NEXT: v_readlane_b32 s52, v40, 12 ; GFX11-NEXT: v_readlane_b32 s51, v40, 11 ; GFX11-NEXT: v_readlane_b32 s50, v40, 10 ; GFX11-NEXT: v_readlane_b32 s49, v40, 9 ; GFX11-NEXT: v_readlane_b32 s48, v40, 8 -; GFX11-NEXT: v_readlane_b32 s47, v40, 7 -; GFX11-NEXT: v_readlane_b32 s46, v40, 6 +; GFX11-NEXT: v_readlane_b32 s39, v40, 7 +; GFX11-NEXT: v_readlane_b32 s38, v40, 6 ; GFX11-NEXT: v_readlane_b32 s37, v40, 5 ; GFX11-NEXT: v_readlane_b32 s36, v40, 4 ; GFX11-NEXT: v_readlane_b32 s35, v40, 3 @@ -9273,29 +9273,29 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 9 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 10 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 11 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 15 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 14 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 13 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 12 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 11 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 10 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll index 3c85914536f28..4f5c46d5f424f 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll @@ -365,12 +365,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -388,8 +388,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -437,12 +437,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -458,9 +458,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -504,13 +504,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -526,8 +526,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -571,13 +571,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -593,8 +593,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -735,19 +735,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -760,11 +760,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -773,7 +773,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -785,12 +785,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -806,9 +806,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -867,13 +867,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -889,8 +889,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -944,13 +944,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -966,8 +966,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1585,12 +1585,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1608,8 +1608,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1657,12 +1657,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1678,9 +1678,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1724,13 +1724,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1746,8 +1746,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1791,13 +1791,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1813,8 +1813,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1955,19 +1955,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -1980,11 +1980,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -1993,7 +1993,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2005,12 +2005,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2026,9 +2026,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2087,13 +2087,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2109,8 +2109,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2164,13 +2164,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2186,8 +2186,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2865,12 +2865,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -2888,8 +2888,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -2937,12 +2937,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -2958,9 +2958,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3004,13 +3004,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -3026,8 +3026,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3071,13 +3071,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -3093,8 +3093,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3235,19 +3235,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -3260,11 +3260,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -3273,7 +3273,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -3285,12 +3285,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -3306,9 +3306,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3367,13 +3367,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3444,13 +3444,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -3466,8 +3466,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3641,12 +3641,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -3664,8 +3664,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -3713,12 +3713,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -3734,9 +3734,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3780,13 +3780,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -3802,8 +3802,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3847,13 +3847,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -3869,8 +3869,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4011,19 +4011,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -4036,11 +4036,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -4049,7 +4049,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -4061,12 +4061,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -4082,9 +4082,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4143,13 +4143,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -4165,8 +4165,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4220,13 +4220,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -4242,8 +4242,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4920,12 +4920,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -4943,8 +4943,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -4992,12 +4992,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5013,9 +5013,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5059,13 +5059,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5081,8 +5081,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5126,13 +5126,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -5148,8 +5148,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5316,19 +5316,19 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -5341,11 +5341,11 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -5354,7 +5354,7 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -5366,12 +5366,12 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -5387,9 +5387,9 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5448,13 +5448,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -5470,8 +5470,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5525,13 +5525,13 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fadd_uni_address_div_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -5547,8 +5547,8 @@ define amdgpu_kernel void @global_atomic_fadd_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5753,7 +5753,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 ; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 ; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 @@ -5769,7 +5769,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 @@ -5803,7 +5803,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -5820,8 +5820,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-NEXT: .LBB9_3: ; GFX7LESS-NEXT: s_endpgm @@ -5838,7 +5838,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 ; GFX9-NEXT: s_addc_u32 s65, s65, 0 -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -5859,7 +5859,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start @@ -5878,7 +5878,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -5899,8 +5899,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-NEXT: .LBB9_3: ; GFX9-NEXT: s_endpgm @@ -5930,8 +5930,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -5962,7 +5962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -5979,8 +5979,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-NEXT: .LBB9_3: ; GFX1064-NEXT: s_endpgm @@ -5998,7 +5998,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB9_3 @@ -6010,7 +6010,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6041,7 +6041,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -6058,8 +6058,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-NEXT: .LBB9_3: ; GFX1032-NEXT: s_endpgm @@ -6085,8 +6085,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6116,7 +6116,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -6130,8 +6130,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-NEXT: .LBB9_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6143,7 +6143,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 ; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -6157,7 +6157,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6183,7 +6183,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -6196,8 +6196,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-NEXT: .LBB9_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6212,7 +6212,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 ; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 @@ -6228,7 +6228,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 @@ -6262,7 +6262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -6279,8 +6279,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-DPP-NEXT: .LBB9_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -6297,7 +6297,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 ; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -6318,7 +6318,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start @@ -6337,7 +6337,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 ; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -6358,8 +6358,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-DPP-NEXT: .LBB9_3: ; GFX9-DPP-NEXT: s_endpgm @@ -6389,8 +6389,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6421,7 +6421,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -6438,8 +6438,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-DPP-NEXT: .LBB9_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -6457,7 +6457,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3 @@ -6469,7 +6469,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6500,7 +6500,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -6517,8 +6517,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-DPP-NEXT: .LBB9_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -6544,8 +6544,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6575,7 +6575,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -6589,8 +6589,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-DPP-NEXT: .LBB9_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6602,7 +6602,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -6616,7 +6616,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6642,7 +6642,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -6655,8 +6655,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-DPP-NEXT: .LBB9_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6669,18 +6669,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -6699,8 +6699,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -6725,21 +6725,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -6753,25 +6753,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4 ; GFX7LESS-NEXT: .LBB10_5: ; GFX7LESS-NEXT: s_endpgm @@ -6798,7 +6798,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -6835,7 +6835,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start @@ -6854,7 +6854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -6875,8 +6875,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB10_4 ; GFX9-NEXT: .LBB10_5: ; GFX9-NEXT: s_endpgm @@ -6903,7 +6903,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -6940,7 +6940,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start @@ -6964,7 +6964,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -6981,8 +6981,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1064-NEXT: .LBB10_5: ; GFX1064-NEXT: s_endpgm @@ -7009,7 +7009,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7038,7 +7038,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -7069,7 +7069,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -7086,8 +7086,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1032-NEXT: .LBB10_5: ; GFX1032-NEXT: s_endpgm @@ -7115,7 +7115,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -7145,7 +7145,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7169,7 +7169,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -7183,8 +7183,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1164-NEXT: .LBB10_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7213,7 +7213,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -7233,7 +7233,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -7262,7 +7262,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -7275,8 +7275,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1132-NEXT: .LBB10_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7285,22 +7285,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -7318,23 +7318,23 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -7348,37 +7348,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -7393,17 +7393,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7457,10 +7457,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) @@ -7471,31 +7471,31 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -7526,7 +7526,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7582,7 +7582,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7606,7 +7606,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -7623,8 +7623,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -7651,7 +7651,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7696,7 +7696,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 @@ -7725,7 +7725,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -7742,8 +7742,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -7771,7 +7771,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -7826,7 +7826,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7850,7 +7850,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -7864,8 +7864,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7894,7 +7894,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -7936,7 +7936,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 @@ -7962,7 +7962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -7975,8 +7975,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8508,12 +8508,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -8531,8 +8531,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -8585,12 +8585,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -8606,9 +8606,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8655,13 +8655,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -8677,8 +8677,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8725,13 +8725,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -8747,8 +8747,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8922,19 +8922,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -8947,11 +8947,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -8962,7 +8962,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -8975,12 +8975,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -8996,9 +8996,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9074,13 +9074,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -9096,8 +9096,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9163,13 +9163,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -9185,8 +9185,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9941,12 +9941,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -9964,8 +9964,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -10018,12 +10018,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -10039,9 +10039,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10088,13 +10088,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -10110,8 +10110,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10158,13 +10158,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -10180,8 +10180,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10355,19 +10355,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -10380,11 +10380,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -10395,7 +10395,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -10408,12 +10408,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -10429,9 +10429,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10507,13 +10507,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -10529,8 +10529,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10596,13 +10596,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -10618,8 +10618,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10856,12 +10856,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -10879,8 +10879,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -10933,12 +10933,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -10954,9 +10954,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11003,13 +11003,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -11025,8 +11025,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11073,13 +11073,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -11095,8 +11095,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11270,19 +11270,19 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -11295,11 +11295,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -11310,7 +11310,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -11323,12 +11323,12 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -11344,9 +11344,9 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11422,13 +11422,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -11444,8 +11444,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11511,13 +11511,13 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -11533,8 +11533,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11795,11 +11795,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -11829,7 +11829,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -11846,8 +11846,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-NEXT: .LBB16_3: ; GFX7LESS-NEXT: s_endpgm @@ -11884,10 +11884,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start @@ -11906,7 +11906,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -11927,8 +11927,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-NEXT: .LBB16_3: ; GFX9-NEXT: s_endpgm @@ -11962,8 +11962,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -11990,7 +11990,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -12007,8 +12007,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-NEXT: .LBB16_3: ; GFX1064-NEXT: s_endpgm @@ -12024,7 +12024,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12042,7 +12042,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -12069,7 +12069,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -12086,8 +12086,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-NEXT: .LBB16_3: ; GFX1032-NEXT: s_endpgm @@ -12121,8 +12121,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -12150,7 +12150,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -12164,8 +12164,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-NEXT: .LBB16_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12178,7 +12178,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_clause 0x1 ; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12199,7 +12199,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -12223,7 +12223,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -12236,8 +12236,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-NEXT: .LBB16_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12270,11 +12270,11 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -12304,7 +12304,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -12321,8 +12321,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-DPP-NEXT: .LBB16_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -12359,10 +12359,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start @@ -12381,7 +12381,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -12402,8 +12402,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-DPP-NEXT: .LBB16_3: ; GFX9-DPP-NEXT: s_endpgm @@ -12437,8 +12437,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12465,7 +12465,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -12482,8 +12482,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-DPP-NEXT: .LBB16_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -12499,7 +12499,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12517,7 +12517,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12544,7 +12544,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -12561,8 +12561,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-DPP-NEXT: .LBB16_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -12596,8 +12596,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12625,7 +12625,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -12639,8 +12639,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-DPP-NEXT: .LBB16_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12653,7 +12653,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_clause 0x1 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12674,7 +12674,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12698,7 +12698,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -12711,8 +12711,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-DPP-NEXT: .LBB16_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12725,18 +12725,18 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -12755,8 +12755,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -12781,21 +12781,21 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -12809,25 +12809,25 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4 ; GFX7LESS-NEXT: .LBB17_5: ; GFX7LESS-NEXT: s_endpgm @@ -12854,7 +12854,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -12891,7 +12891,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start @@ -12910,7 +12910,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -12931,8 +12931,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB17_4 ; GFX9-NEXT: .LBB17_5: ; GFX9-NEXT: s_endpgm @@ -12959,7 +12959,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -12996,7 +12996,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13020,7 +13020,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -13037,8 +13037,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1064-NEXT: .LBB17_5: ; GFX1064-NEXT: s_endpgm @@ -13065,7 +13065,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13094,7 +13094,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -13125,7 +13125,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -13142,8 +13142,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1032-NEXT: .LBB17_5: ; GFX1032-NEXT: s_endpgm @@ -13171,7 +13171,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -13201,7 +13201,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -13225,7 +13225,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -13239,8 +13239,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1164-NEXT: .LBB17_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13269,7 +13269,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -13289,7 +13289,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -13318,7 +13318,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -13331,8 +13331,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1132-NEXT: .LBB17_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13341,22 +13341,22 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -13374,23 +13374,23 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -13404,37 +13404,37 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fadd_double_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -13449,17 +13449,17 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13513,10 +13513,10 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) @@ -13527,31 +13527,31 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -13582,7 +13582,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13638,7 +13638,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start @@ -13662,7 +13662,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -13679,8 +13679,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1064-DPP-NEXT: .LBB17_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -13707,7 +13707,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13752,7 +13752,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3 @@ -13781,7 +13781,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -13798,8 +13798,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1032-DPP-NEXT: .LBB17_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -13827,7 +13827,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -13882,7 +13882,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -13906,7 +13906,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -13920,8 +13920,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1164-DPP-NEXT: .LBB17_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13950,7 +13950,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -13992,7 +13992,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3 @@ -14018,7 +14018,7 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -14031,8 +14031,8 @@ define amdgpu_kernel void @global_atomic_fadd_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1132-DPP-NEXT: .LBB17_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll index cc9c310e5c059..e1ba4a2b0bf2a 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll @@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start @@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start @@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start @@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fmax_uni_address_div_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmax_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3153,11 +3153,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3189,7 +3189,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -3205,8 +3205,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-NEXT: .LBB6_3: ; GFX7LESS-NEXT: s_endpgm @@ -3237,10 +3237,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3258,7 +3258,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -3281,8 +3281,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-NEXT: .LBB6_3: ; GFX9-NEXT: s_endpgm @@ -3312,8 +3312,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -3340,7 +3340,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -3358,8 +3358,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-NEXT: .LBB6_3: ; GFX1064-NEXT: s_endpgm @@ -3375,7 +3375,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB6_3 @@ -3389,7 +3389,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -3416,7 +3416,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -3434,8 +3434,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-NEXT: .LBB6_3: ; GFX1032-NEXT: s_endpgm @@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -3484,7 +3484,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-NEXT: .LBB6_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3514,7 +3514,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -3526,7 +3526,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -3548,7 +3548,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-NEXT: .LBB6_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3593,11 +3593,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3629,7 +3629,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -3645,8 +3645,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-DPP-NEXT: .LBB6_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -3677,10 +3677,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3698,7 +3698,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -3721,8 +3721,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-DPP-NEXT: .LBB6_3: ; GFX9-DPP-NEXT: s_endpgm @@ -3752,8 +3752,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3780,7 +3780,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -3798,8 +3798,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-DPP-NEXT: .LBB6_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -3815,7 +3815,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 @@ -3829,7 +3829,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3856,7 +3856,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -3874,8 +3874,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-DPP-NEXT: .LBB6_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -3898,8 +3898,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3924,7 +3924,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-DPP-NEXT: .LBB6_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3954,7 +3954,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -3966,7 +3966,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3988,7 +3988,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -4005,8 +4005,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-DPP-NEXT: .LBB6_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4019,18 +4019,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -4049,8 +4049,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42] @@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -4108,24 +4108,24 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4 ; GFX7LESS-NEXT: .LBB7_5: ; GFX7LESS-NEXT: s_endpgm @@ -4152,7 +4152,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4192,7 +4192,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4210,7 +4210,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -4233,8 +4233,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB7_4 ; GFX9-NEXT: .LBB7_5: ; GFX9-NEXT: s_endpgm @@ -4261,7 +4261,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4301,7 +4301,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4325,7 +4325,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -4343,8 +4343,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1064-NEXT: .LBB7_5: ; GFX1064-NEXT: s_endpgm @@ -4371,7 +4371,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4402,7 +4402,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -4434,7 +4434,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -4452,8 +4452,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1032-NEXT: .LBB7_5: ; GFX1032-NEXT: s_endpgm @@ -4481,7 +4481,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -4514,7 +4514,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -4535,7 +4535,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1164-NEXT: .LBB7_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -4606,7 +4606,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -4635,7 +4635,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1132-NEXT: .LBB7_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -4696,27 +4696,27 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -4726,36 +4726,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4841,10 +4841,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] @@ -4856,32 +4856,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -4912,7 +4912,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4976,7 +4976,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start @@ -4999,7 +4999,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -5019,8 +5019,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1064-DPP-NEXT: .LBB7_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -5047,7 +5047,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -5098,7 +5098,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 @@ -5128,7 +5128,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -5146,8 +5146,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1032-DPP-NEXT: .LBB7_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -5175,7 +5175,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5241,7 +5241,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -5262,7 +5262,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1164-DPP-NEXT: .LBB7_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5356,7 +5356,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 @@ -5382,7 +5382,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -5399,8 +5399,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1132-DPP-NEXT: .LBB7_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start @@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6701,11 +6701,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -6737,7 +6737,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -6753,8 +6753,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-NEXT: .LBB10_3: ; GFX7LESS-NEXT: s_endpgm @@ -6785,10 +6785,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start @@ -6806,7 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -6829,8 +6829,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-NEXT: .LBB10_3: ; GFX9-NEXT: s_endpgm @@ -6860,8 +6860,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -6888,7 +6888,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -6906,8 +6906,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-NEXT: .LBB10_3: ; GFX1064-NEXT: s_endpgm @@ -6923,7 +6923,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_3 @@ -6937,7 +6937,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -6964,7 +6964,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-NEXT: .LBB10_3: ; GFX1032-NEXT: s_endpgm @@ -7006,8 +7006,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -7032,7 +7032,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-NEXT: .LBB10_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7062,7 +7062,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -7074,7 +7074,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -7096,7 +7096,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -7113,8 +7113,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-NEXT: .LBB10_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7141,11 +7141,11 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -7177,7 +7177,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -7193,8 +7193,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-DPP-NEXT: .LBB10_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -7225,10 +7225,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7246,7 +7246,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -7269,8 +7269,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-DPP-NEXT: .LBB10_3: ; GFX9-DPP-NEXT: s_endpgm @@ -7300,8 +7300,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7328,7 +7328,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -7346,8 +7346,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -7363,7 +7363,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 @@ -7377,7 +7377,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7404,7 +7404,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -7422,8 +7422,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -7446,8 +7446,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7472,7 +7472,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7502,7 +7502,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -7514,7 +7514,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7536,7 +7536,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -7553,8 +7553,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7567,18 +7567,18 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -7597,8 +7597,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[0:1], v[2:3], v[41:42] @@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -7656,24 +7656,24 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7LESS-NEXT: .LBB11_5: ; GFX7LESS-NEXT: s_endpgm @@ -7700,7 +7700,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7740,7 +7740,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start @@ -7758,7 +7758,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -7781,8 +7781,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB11_4 ; GFX9-NEXT: .LBB11_5: ; GFX9-NEXT: s_endpgm @@ -7809,7 +7809,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7849,7 +7849,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start @@ -7873,7 +7873,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -7891,8 +7891,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1064-NEXT: .LBB11_5: ; GFX1064-NEXT: s_endpgm @@ -7919,7 +7919,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7950,7 +7950,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -7982,7 +7982,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -8000,8 +8000,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1032-NEXT: .LBB11_5: ; GFX1032-NEXT: s_endpgm @@ -8029,7 +8029,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -8062,7 +8062,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8083,7 +8083,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1164-NEXT: .LBB11_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -8154,7 +8154,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -8183,7 +8183,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1132-NEXT: .LBB11_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8244,27 +8244,27 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -8274,36 +8274,36 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmax_double_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8389,10 +8389,10 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] @@ -8404,32 +8404,32 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_max_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -8460,7 +8460,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -8524,7 +8524,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start @@ -8547,7 +8547,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -8567,8 +8567,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1064-DPP-NEXT: .LBB11_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -8595,7 +8595,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -8646,7 +8646,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 @@ -8676,7 +8676,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -8694,8 +8694,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1032-DPP-NEXT: .LBB11_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -8723,7 +8723,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8789,7 +8789,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8810,7 +8810,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1164-DPP-NEXT: .LBB11_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8904,7 +8904,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 @@ -8930,7 +8930,7 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -8947,8 +8947,8 @@ define amdgpu_kernel void @global_atomic_fmax_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1132-DPP-NEXT: .LBB11_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll index 81a16df17c728..6b1d5253e178f 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll @@ -273,12 +273,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -296,8 +296,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -349,12 +349,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -370,9 +370,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -420,13 +420,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -442,8 +442,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -477,13 +477,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -499,8 +499,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -635,19 +635,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -660,11 +660,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start @@ -675,7 +675,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -687,12 +687,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -708,9 +708,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -778,13 +778,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -800,8 +800,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -851,13 +851,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -873,8 +873,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1312,12 +1312,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1335,8 +1335,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1388,12 +1388,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1409,9 +1409,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1459,13 +1459,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1481,8 +1481,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1516,13 +1516,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1538,8 +1538,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1674,19 +1674,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -1699,11 +1699,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start @@ -1714,7 +1714,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -1726,12 +1726,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -1747,9 +1747,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1817,13 +1817,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -1839,8 +1839,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1890,13 +1890,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -1912,8 +1912,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2351,12 +2351,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -2374,8 +2374,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -2427,12 +2427,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -2448,9 +2448,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2498,13 +2498,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -2520,8 +2520,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2555,13 +2555,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -2577,8 +2577,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2713,19 +2713,19 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -2738,11 +2738,11 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_mul_f32_e32 v2, 1.0, v0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start @@ -2753,7 +2753,7 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2765,12 +2765,12 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2786,9 +2786,9 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2856,13 +2856,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2878,8 +2878,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2929,13 +2929,13 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fmin_uni_address_div_value_default_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2951,8 +2951,8 @@ define amdgpu_kernel void @global_atomic_fmin_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3153,11 +3153,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3189,7 +3189,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -3205,8 +3205,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-NEXT: .LBB6_3: ; GFX7LESS-NEXT: s_endpgm @@ -3237,10 +3237,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3258,7 +3258,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -3281,8 +3281,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-NEXT: .LBB6_3: ; GFX9-NEXT: s_endpgm @@ -3312,8 +3312,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -3340,7 +3340,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -3358,8 +3358,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-NEXT: .LBB6_3: ; GFX1064-NEXT: s_endpgm @@ -3375,7 +3375,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB6_3 @@ -3389,7 +3389,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -3416,7 +3416,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -3434,8 +3434,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-NEXT: .LBB6_3: ; GFX1032-NEXT: s_endpgm @@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -3484,7 +3484,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -3502,8 +3502,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-NEXT: .LBB6_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3514,7 +3514,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -3526,7 +3526,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -3548,7 +3548,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-NEXT: .LBB6_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3593,11 +3593,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -3629,7 +3629,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -3645,8 +3645,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX7LESS-DPP-NEXT: .LBB6_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -3677,10 +3677,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB6_2: ; %atomicrmw.start @@ -3698,7 +3698,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -3721,8 +3721,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX9-DPP-NEXT: .LBB6_3: ; GFX9-DPP-NEXT: s_endpgm @@ -3752,8 +3752,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3780,7 +3780,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -3798,8 +3798,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1064-DPP-NEXT: .LBB6_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -3815,7 +3815,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB6_3 @@ -3829,7 +3829,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3856,7 +3856,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -3874,8 +3874,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1032-DPP-NEXT: .LBB6_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -3898,8 +3898,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3924,7 +3924,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -3942,8 +3942,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1164-DPP-NEXT: .LBB6_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -3954,7 +3954,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -3966,7 +3966,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -3988,7 +3988,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -4005,8 +4005,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB6_2 ; GFX1132-DPP-NEXT: .LBB6_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4019,18 +4019,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -4049,8 +4049,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -4077,19 +4077,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB7_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB7_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42] @@ -4097,8 +4097,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -4108,24 +4108,24 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB7_4 ; GFX7LESS-NEXT: .LBB7_5: ; GFX7LESS-NEXT: s_endpgm @@ -4152,7 +4152,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4192,7 +4192,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4210,7 +4210,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -4233,8 +4233,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB7_4 ; GFX9-NEXT: .LBB7_5: ; GFX9-NEXT: s_endpgm @@ -4261,7 +4261,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4301,7 +4301,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB7_4: ; %atomicrmw.start @@ -4325,7 +4325,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -4343,8 +4343,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1064-NEXT: .LBB7_5: ; GFX1064-NEXT: s_endpgm @@ -4371,7 +4371,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4402,7 +4402,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -4434,7 +4434,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -4452,8 +4452,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1032-NEXT: .LBB7_5: ; GFX1032-NEXT: s_endpgm @@ -4481,7 +4481,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -4514,7 +4514,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -4535,7 +4535,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1164-NEXT: .LBB7_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4583,7 +4583,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -4606,7 +4606,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB7_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -4635,7 +4635,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -4653,8 +4653,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB7_4 ; GFX1132-NEXT: .LBB7_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -4663,22 +4663,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -4696,27 +4696,27 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -4726,36 +4726,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB7_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -4770,17 +4770,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4841,10 +4841,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB7_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] @@ -4856,32 +4856,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -4912,7 +4912,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -4976,7 +4976,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB7_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB7_2: ; %atomicrmw.start @@ -4999,7 +4999,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -5019,8 +5019,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1064-DPP-NEXT: .LBB7_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -5047,7 +5047,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -5098,7 +5098,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB7_3 @@ -5128,7 +5128,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -5146,8 +5146,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1032-DPP-NEXT: .LBB7_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -5175,7 +5175,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -5241,7 +5241,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -5262,7 +5262,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -5280,8 +5280,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1164-DPP-NEXT: .LBB7_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5310,7 +5310,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -5356,7 +5356,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 @@ -5382,7 +5382,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -5399,8 +5399,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB7_2 ; GFX1132-DPP-NEXT: .LBB7_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -5750,12 +5750,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -5773,8 +5773,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -5831,12 +5831,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5852,9 +5852,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5905,13 +5905,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5964,13 +5964,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -5986,8 +5986,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6162,19 +6162,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -6187,11 +6187,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[4:5], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB9_1: ; %atomicrmw.start @@ -6204,7 +6204,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v1 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v0 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[2:3] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -6217,12 +6217,12 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -6238,9 +6238,9 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6325,13 +6325,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -6347,8 +6347,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6409,13 +6409,13 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_one_as_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -6431,8 +6431,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6701,11 +6701,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -6737,7 +6737,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -6753,8 +6753,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-NEXT: .LBB10_3: ; GFX7LESS-NEXT: s_endpgm @@ -6785,10 +6785,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB10_2: ; %atomicrmw.start @@ -6806,7 +6806,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -6829,8 +6829,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-NEXT: .LBB10_3: ; GFX9-NEXT: s_endpgm @@ -6860,8 +6860,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -6888,7 +6888,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -6906,8 +6906,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-NEXT: .LBB10_3: ; GFX1064-NEXT: s_endpgm @@ -6923,7 +6923,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB10_3 @@ -6937,7 +6937,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -6964,7 +6964,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-NEXT: .LBB10_3: ; GFX1032-NEXT: s_endpgm @@ -7006,8 +7006,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -7032,7 +7032,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -7050,8 +7050,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-NEXT: .LBB10_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7062,7 +7062,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -7074,7 +7074,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -7096,7 +7096,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -7113,8 +7113,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-NEXT: .LBB10_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7141,11 +7141,11 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -7177,7 +7177,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -7193,8 +7193,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX7LESS-DPP-NEXT: .LBB10_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -7225,10 +7225,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7246,7 +7246,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[3:4], 4.0 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -7269,8 +7269,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX9-DPP-NEXT: .LBB10_3: ; GFX9-DPP-NEXT: s_endpgm @@ -7300,8 +7300,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7328,7 +7328,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -7346,8 +7346,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -7363,7 +7363,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 @@ -7377,7 +7377,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7404,7 +7404,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -7422,8 +7422,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -7446,8 +7446,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7472,7 +7472,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -7490,8 +7490,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7502,7 +7502,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -7514,7 +7514,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -7536,7 +7536,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -7553,8 +7553,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7567,18 +7567,18 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -7597,8 +7597,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -7625,19 +7625,19 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB11_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] ; GFX7LESS-NEXT: .LBB11_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_max_f64 v[2:3], v[0:1], v[0:1] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_min_f64 v[0:1], v[2:3], v[41:42] @@ -7645,8 +7645,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-NEXT: v_mov_b32_e32 v0, 8 @@ -7656,24 +7656,24 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7LESS-NEXT: .LBB11_5: ; GFX7LESS-NEXT: s_endpgm @@ -7700,7 +7700,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7740,7 +7740,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX9-NEXT: .LBB11_4: ; %atomicrmw.start @@ -7758,7 +7758,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -7781,8 +7781,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB11_4 ; GFX9-NEXT: .LBB11_5: ; GFX9-NEXT: s_endpgm @@ -7809,7 +7809,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7849,7 +7849,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 ; GFX1064-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[4:5], v0, s[52:53] ; GFX1064-NEXT: .LBB11_4: ; %atomicrmw.start @@ -7873,7 +7873,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -7891,8 +7891,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1064-NEXT: .LBB11_5: ; GFX1064-NEXT: s_endpgm @@ -7919,7 +7919,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7950,7 +7950,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -7982,7 +7982,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -8000,8 +8000,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-NEXT: buffer_load_dword v5, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1032-NEXT: .LBB11_5: ; GFX1032-NEXT: s_endpgm @@ -8029,7 +8029,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v2, 0 @@ -8062,7 +8062,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 ; GFX1164-NEXT: v_max_f64 v[41:42], v[2:3], v[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[4:5], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8083,7 +8083,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -8101,8 +8101,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1164-NEXT: .LBB11_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8131,7 +8131,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v2, 0 @@ -8154,7 +8154,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB11_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -8183,7 +8183,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: v_mov_b32_e32 v3, s53 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -8201,8 +8201,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB11_4 ; GFX1132-NEXT: .LBB11_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8211,22 +8211,22 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -8244,27 +8244,27 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[2:3], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_max_f64 v[41:42], v[0:1], v[0:1] ; GFX7LESS-DPP-NEXT: .LBB11_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_max_f64 v[0:1], v[2:3], v[2:3] -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: v_min_f64 v[0:1], v[0:1], v[41:42] ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX7LESS-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v0, 8 @@ -8274,36 +8274,36 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v3, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fmin_double_uni_address_div_value_default_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -8318,17 +8318,17 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8389,10 +8389,10 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB11_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: v_max_f64 v[3:4], s[52:53], s[52:53] @@ -8404,32 +8404,32 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: v_min_f64 v[3:4], v[5:6], v[3:4] -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -8460,7 +8460,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -8524,7 +8524,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB11_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB11_2: ; %atomicrmw.start @@ -8547,7 +8547,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -8567,8 +8567,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1064-DPP-NEXT: .LBB11_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -8595,7 +8595,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -8646,7 +8646,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v3, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB11_3 @@ -8676,7 +8676,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -8694,8 +8694,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1032-DPP-NEXT: .LBB11_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -8723,7 +8723,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8789,7 +8789,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-DPP-NEXT: v_max_f64 v[41:42], v[3:4], v[3:4] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8810,7 +8810,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -8828,8 +8828,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1164-DPP-NEXT: .LBB11_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8858,7 +8858,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8904,7 +8904,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, v8 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v4, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 @@ -8930,7 +8930,7 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -8947,8 +8947,8 @@ define amdgpu_kernel void @global_atomic_fmin_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB11_2 ; GFX1132-DPP-NEXT: .LBB11_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll index 416ce5a031810..d575605f102b7 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll @@ -425,12 +425,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -448,8 +448,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -497,12 +497,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -518,9 +518,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -564,13 +564,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -586,8 +586,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -631,13 +631,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -653,8 +653,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -821,19 +821,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -846,11 +846,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB1_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -859,7 +859,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -871,12 +871,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -892,9 +892,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -953,13 +953,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -975,8 +975,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1030,13 +1030,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_align4_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -1052,8 +1052,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1757,12 +1757,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -1780,8 +1780,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -1829,12 +1829,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -1850,9 +1850,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1896,13 +1896,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -1918,8 +1918,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -1963,13 +1963,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -1985,8 +1985,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2153,19 +2153,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -2178,11 +2178,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB3_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -2191,7 +2191,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -2203,12 +2203,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -2224,9 +2224,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2285,13 +2285,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -2307,8 +2307,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -2362,13 +2362,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -2384,8 +2384,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_one_as_scope ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3089,12 +3089,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -3161,12 +3161,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -3182,9 +3182,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3228,13 +3228,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -3250,8 +3250,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3295,13 +3295,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3485,19 +3485,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -3510,11 +3510,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB5_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -3523,7 +3523,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -3535,12 +3535,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -3556,9 +3556,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3617,13 +3617,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -3639,8 +3639,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3694,13 +3694,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -3716,8 +3716,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -3917,12 +3917,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -3940,8 +3940,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -3989,12 +3989,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -4010,9 +4010,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4056,13 +4056,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -4078,8 +4078,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4123,13 +4123,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -4145,8 +4145,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4313,19 +4313,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -4338,11 +4338,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -4351,7 +4351,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -4363,12 +4363,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -4384,9 +4384,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4445,13 +4445,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -4467,8 +4467,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -4522,13 +4522,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -4544,8 +4544,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_agent_scope_ ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5248,12 +5248,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -5271,8 +5271,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -5320,12 +5320,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX9-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -5341,9 +5341,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5387,13 +5387,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1064-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -5409,8 +5409,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5454,13 +5454,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1032-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -5476,8 +5476,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5644,19 +5644,19 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -5669,11 +5669,11 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v2, off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB8_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -5682,7 +5682,7 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v4, v2 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, v1 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap v[3:4], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, v3, v2 ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -5694,12 +5694,12 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX9-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -5715,9 +5715,9 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5776,13 +5776,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1064-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -5798,8 +5798,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -5853,13 +5853,13 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; ; GFX1032-DPP-LABEL: global_atomic_fsub_uni_address_div_value_default_scope_strictfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -5875,8 +5875,8 @@ define amdgpu_kernel void @global_atomic_fsub_uni_address_div_value_default_scop ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -6081,7 +6081,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 ; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 ; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 @@ -6097,7 +6097,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 @@ -6131,7 +6131,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -6148,8 +6148,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-NEXT: .LBB9_3: ; GFX7LESS-NEXT: s_endpgm @@ -6166,7 +6166,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-NEXT: s_add_u32 s64, s64, s11 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 ; GFX9-NEXT: s_addc_u32 s65, s65, 0 -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-NEXT: s_movk_i32 s32, 0x800 ; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -6187,7 +6187,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: .LBB9_2: ; %atomicrmw.start @@ -6206,7 +6206,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -6227,8 +6227,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-NEXT: .LBB9_3: ; GFX9-NEXT: s_endpgm @@ -6258,8 +6258,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-NEXT: s_mov_b32 s51, s8 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6290,7 +6290,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -6307,8 +6307,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-NEXT: .LBB9_3: ; GFX1064-NEXT: s_endpgm @@ -6326,7 +6326,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_cbranch_execz .LBB9_3 @@ -6338,7 +6338,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-NEXT: s_mov_b32 s51, s8 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6369,7 +6369,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -6386,8 +6386,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-NEXT: .LBB9_3: ; GFX1032-NEXT: s_endpgm @@ -6413,8 +6413,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6444,7 +6444,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-NEXT: .LBB9_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6471,7 +6471,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 ; GFX1132-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -6485,7 +6485,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6511,7 +6511,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -6524,8 +6524,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-NEXT: .LBB9_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6540,7 +6540,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 ; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], exec ; GFX7LESS-DPP-NEXT: v_mbcnt_lo_u32_b32_e64 v3, s0, 0 @@ -6556,7 +6556,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_bcnt1_i32_b64 s2, s[0:1] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v3, 20, v2 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 @@ -6590,7 +6590,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -6607,8 +6607,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX7LESS-DPP-NEXT: .LBB9_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -6625,7 +6625,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_add_u32 s64, s64, s11 ; GFX9-DPP-NEXT: v_mbcnt_hi_u32_b32 v3, s1, v3 ; GFX9-DPP-NEXT: s_addc_u32 s65, s65, 0 -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[2:3], vcc @@ -6646,7 +6646,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: .LBB9_2: ; %atomicrmw.start @@ -6665,7 +6665,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:12 ; GFX9-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -6686,8 +6686,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX9-DPP-NEXT: buffer_load_dword v4, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX9-DPP-NEXT: .LBB9_3: ; GFX9-DPP-NEXT: s_endpgm @@ -6717,8 +6717,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6749,7 +6749,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -6766,8 +6766,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1064-DPP-NEXT: .LBB9_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -6785,7 +6785,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 ; GFX1032-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB9_3 @@ -6797,7 +6797,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], v[3:4], 4.0 @@ -6828,7 +6828,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -6845,8 +6845,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1032-DPP-NEXT: .LBB9_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -6872,8 +6872,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6903,7 +6903,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -6917,8 +6917,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1164-DPP-NEXT: .LBB9_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6930,7 +6930,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v0, s8, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -6944,7 +6944,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -6970,7 +6970,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -6983,8 +6983,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB9_2 ; GFX1132-DPP-NEXT: .LBB9_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -6997,18 +6997,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -7027,8 +7027,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -7053,21 +7053,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB10_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: .LBB10_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -7081,25 +7081,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB10_4 ; GFX7LESS-NEXT: .LBB10_5: ; GFX7LESS-NEXT: s_endpgm @@ -7126,7 +7126,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7163,7 +7163,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB10_4: ; %atomicrmw.start @@ -7182,7 +7182,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -7203,8 +7203,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB10_4 ; GFX9-NEXT: .LBB10_5: ; GFX9-NEXT: s_endpgm @@ -7231,7 +7231,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7268,7 +7268,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB10_4: ; %atomicrmw.start @@ -7292,7 +7292,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -7309,8 +7309,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1064-NEXT: .LBB10_5: ; GFX1064-NEXT: s_endpgm @@ -7337,7 +7337,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7366,7 +7366,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -7397,7 +7397,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -7414,8 +7414,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1032-NEXT: .LBB10_5: ; GFX1032-NEXT: s_endpgm @@ -7443,7 +7443,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -7473,7 +7473,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -7497,7 +7497,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -7511,8 +7511,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1164-NEXT: .LBB10_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7541,7 +7541,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -7561,7 +7561,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: s_cbranch_scc1 .LBB10_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -7590,7 +7590,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -7603,8 +7603,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB10_4 ; GFX1132-NEXT: .LBB10_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -7613,22 +7613,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -7646,23 +7646,23 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -7676,37 +7676,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB10_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_align4_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -7721,17 +7721,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -7785,10 +7785,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB10_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) @@ -7799,31 +7799,31 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -7854,7 +7854,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -7910,7 +7910,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB10_2: ; %atomicrmw.start @@ -7934,7 +7934,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -7951,8 +7951,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1064-DPP-NEXT: .LBB10_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -7979,7 +7979,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -8024,7 +8024,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB10_3 @@ -8053,7 +8053,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -8070,8 +8070,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1032-DPP-NEXT: .LBB10_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -8099,7 +8099,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -8154,7 +8154,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB10_3 ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8178,7 +8178,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -8192,8 +8192,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1164-DPP-NEXT: .LBB10_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8222,7 +8222,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -8264,7 +8264,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB10_3 @@ -8290,7 +8290,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -8303,8 +8303,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB10_2 ; GFX1132-DPP-NEXT: .LBB10_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -8835,12 +8835,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -8858,8 +8858,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -8912,12 +8912,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -8933,9 +8933,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -8982,13 +8982,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -9004,8 +9004,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9052,13 +9052,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -9074,8 +9074,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9249,19 +9249,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -9274,11 +9274,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB12_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -9289,7 +9289,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -9302,12 +9302,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -9323,9 +9323,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9401,13 +9401,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -9423,8 +9423,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -9490,13 +9490,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_one_as_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -9512,8 +9512,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_one_a ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10268,12 +10268,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -10291,8 +10291,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -10345,12 +10345,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -10366,9 +10366,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10415,13 +10415,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -10437,8 +10437,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10485,13 +10485,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -10507,8 +10507,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10682,19 +10682,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -10707,11 +10707,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB14_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -10722,7 +10722,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -10735,12 +10735,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -10756,9 +10756,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10834,13 +10834,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -10856,8 +10856,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -10923,13 +10923,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -10945,8 +10945,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11183,12 +11183,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s50, -1 -; GFX7LESS-NEXT: s_mov_b32 s51, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s48, s48, s11 -; GFX7LESS-NEXT: s_addc_u32 s49, s49, 0 +; GFX7LESS-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s38, -1 +; GFX7LESS-NEXT: s_mov_b32 s39, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s36, s36, s11 +; GFX7LESS-NEXT: s_addc_u32 s37, s37, 0 ; GFX7LESS-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-NEXT: s_mov_b32 s12, s8 @@ -11206,8 +11206,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -11260,12 +11260,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-NEXT: s_mov_b32 s12, s8 ; GFX9-NEXT: s_add_u32 s8, s34, 44 @@ -11281,9 +11281,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11330,13 +11330,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064: ; %bb.0: -; GFX1064-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-NEXT: s_mov_b32 s50, -1 -; GFX1064-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-NEXT: s_mov_b32 s38, -1 +; GFX1064-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-NEXT: s_mov_b32 s12, s8 ; GFX1064-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-NEXT: s_mov_b32 s13, s9 @@ -11352,8 +11352,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-NEXT: s_mov_b32 s32, 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11400,13 +11400,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032: ; %bb.0: -; GFX1032-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-NEXT: s_mov_b32 s50, -1 -; GFX1032-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-NEXT: s_mov_b32 s38, -1 +; GFX1032-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-NEXT: s_mov_b32 s12, s8 ; GFX1032-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-NEXT: s_mov_b32 s13, s9 @@ -11422,8 +11422,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-NEXT: s_mov_b32 s32, 0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11597,19 +11597,19 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_mov_b32 s32, 0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s49, s49, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[48:49], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s51, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s50, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s39, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s38, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s4, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s5, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[4:5] @@ -11622,11 +11622,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v31, v0, v2 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[48:51], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[4:5], off, s[36:39], 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], 0 ; GFX7LESS-DPP-NEXT: .LBB15_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 @@ -11637,7 +11637,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v8, v4 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, v3 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, v2 -; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[48:51], 0 glc +; GFX7LESS-DPP-NEXT: buffer_atomic_cmpswap_x2 v[6:9], off, s[36:39], 0 glc ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_cmp_eq_u64_e32 vcc, v[6:7], v[4:5] ; GFX7LESS-DPP-NEXT: s_or_b64 s[0:1], vcc, s[0:1] @@ -11650,12 +11650,12 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s50, -1 -; GFX9-DPP-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s48, s48, s11 -; GFX9-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s38, -1 +; GFX9-DPP-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s36, s36, s11 +; GFX9-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s12, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s34, 44 @@ -11671,9 +11671,9 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-DPP-NEXT: s_mov_b32 s32, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11749,13 +11749,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1064-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1064-DPP: ; %bb.0: -; GFX1064-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1064-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1064-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1064-DPP-NEXT: s_mov_b32 s51, 0x31e16000 -; GFX1064-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1064-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1064-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1064-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1064-DPP-NEXT: s_mov_b32 s39, 0x31e16000 +; GFX1064-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1064-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1064-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1064-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1064-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s9 @@ -11771,8 +11771,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1064-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -11838,13 +11838,13 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; ; GFX1032-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_agent_scope_unsafe_structfp: ; GFX1032-DPP: ; %bb.0: -; GFX1032-DPP-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX1032-DPP-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX1032-DPP-NEXT: s_mov_b32 s50, -1 -; GFX1032-DPP-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX1032-DPP-NEXT: s_add_u32 s48, s48, s11 +; GFX1032-DPP-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX1032-DPP-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX1032-DPP-NEXT: s_mov_b32 s38, -1 +; GFX1032-DPP-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX1032-DPP-NEXT: s_add_u32 s36, s36, s11 ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[4:5] -; GFX1032-DPP-NEXT: s_addc_u32 s49, s49, 0 +; GFX1032-DPP-NEXT: s_addc_u32 s37, s37, 0 ; GFX1032-DPP-NEXT: s_mov_b32 s12, s8 ; GFX1032-DPP-NEXT: s_add_u32 s8, s34, 44 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s9 @@ -11860,8 +11860,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_agent ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX1032-DPP-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b32 s32, 0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -12121,11 +12121,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-NEXT: v_or_b32_e32 v3, v0, v1 @@ -12155,7 +12155,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 @@ -12172,8 +12172,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-NEXT: .LBB16_3: ; GFX7LESS-NEXT: s_endpgm @@ -12210,10 +12210,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-NEXT: s_mov_b32 s51, s8 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-NEXT: .LBB16_2: ; %atomicrmw.start @@ -12232,7 +12232,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -12253,8 +12253,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-NEXT: .LBB16_3: ; GFX9-NEXT: s_endpgm @@ -12288,8 +12288,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) @@ -12316,7 +12316,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -12333,8 +12333,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-NEXT: .LBB16_3: ; GFX1064-NEXT: s_endpgm @@ -12350,7 +12350,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12368,7 +12368,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-NEXT: s_waitcnt lgkmcnt(0) @@ -12395,7 +12395,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -12412,8 +12412,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-NEXT: .LBB16_3: ; GFX1032-NEXT: s_endpgm @@ -12447,8 +12447,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-NEXT: s_mov_b32 s51, s8 ; GFX1164-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) @@ -12476,7 +12476,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -12490,8 +12490,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-NEXT: .LBB16_3: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12504,7 +12504,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_clause 0x1 ; GFX1132-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -12525,7 +12525,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-NEXT: s_mov_b32 s51, s13 ; GFX1132-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) @@ -12549,7 +12549,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -12562,8 +12562,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-NEXT: .LBB16_3: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12596,11 +12596,11 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GFX7LESS-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX7LESS-DPP-NEXT: v_or_b32_e32 v3, v0, v1 @@ -12630,7 +12630,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 @@ -12647,8 +12647,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX7LESS-DPP-NEXT: .LBB16_3: ; GFX7LESS-DPP-NEXT: s_endpgm @@ -12685,10 +12685,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s1 -; GFX9-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX9-DPP-NEXT: .LBB16_2: ; %atomicrmw.start @@ -12707,7 +12707,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 @@ -12728,8 +12728,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX9-DPP-NEXT: .LBB16_3: ; GFX9-DPP-NEXT: s_endpgm @@ -12763,8 +12763,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12791,7 +12791,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -12808,8 +12808,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1064-DPP-NEXT: .LBB16_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -12825,7 +12825,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX1032-DPP-NEXT: s_bcnt1_i32_b32 s0, exec_lo ; GFX1032-DPP-NEXT: s_mov_b32 s1, 0x43300000 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_add_f64 v[3:4], 0xc3300000, s[0:1] ; GFX1032-DPP-NEXT: s_movk_i32 s32, 0x400 ; GFX1032-DPP-NEXT: v_mul_f64 v[41:42], 4.0, v[3:4] @@ -12843,7 +12843,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v4, v3 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1032-DPP-NEXT: s_load_dwordx2 s[0:1], s[52:53], 0x0 ; GFX1032-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12870,7 +12870,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -12887,8 +12887,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1032-DPP-NEXT: .LBB16_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -12922,8 +12922,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s51, s8 ; GFX1164-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1164-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -12951,7 +12951,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -12965,8 +12965,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1164-DPP-NEXT: .LBB16_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -12979,7 +12979,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v1, s0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v0, 0x43300000 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_clause 0x1 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v0, off offset:20 ; GFX1132-DPP-NEXT: scratch_store_b32 off, v1, off offset:16 @@ -13000,7 +13000,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s51, s13 ; GFX1132-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX1132-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_load_b64 s[0:1], s[52:53], 0x0 ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) @@ -13024,7 +13024,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -13037,8 +13037,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_uni_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB16_2 ; GFX1132-DPP-NEXT: .LBB16_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13051,18 +13051,18 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS: ; %bb.0: ; GFX7LESS-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-NEXT: s_mov_b64 s[48:49], s[0:1] ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 @@ -13081,8 +13081,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: s_mov_b64 s[0:1], exec @@ -13107,21 +13107,21 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX7LESS-NEXT: s_cbranch_execz .LBB17_5 ; GFX7LESS-NEXT: ; %bb.3: -; GFX7LESS-NEXT: s_load_dwordx2 s[64:65], s[36:37], 0x9 -; GFX7LESS-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x9 +; GFX7LESS-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) -; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-NEXT: .LBB17_4: ; %atomicrmw.start ; GFX7LESS-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-NEXT: v_add_f64 v[2:3], v[0:1], -v[41:42] -; GFX7LESS-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -13135,25 +13135,25 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-NEXT: v_mov_b32_e32 v31, v40 -; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-NEXT: s_cbranch_execnz .LBB17_4 ; GFX7LESS-NEXT: .LBB17_5: ; GFX7LESS-NEXT: s_endpgm @@ -13180,7 +13180,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-NEXT: s_mov_b32 s33, s10 ; GFX9-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13217,7 +13217,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: ; %bb.3: ; GFX9-NEXT: s_load_dwordx2 s[52:53], s[36:37], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_mov_b64 s[62:63], 0 +; GFX9-NEXT: s_mov_b64 s[54:55], 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX9-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13236,7 +13236,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-NEXT: buffer_store_dword v4, off, s[64:67], 0 offset:12 ; GFX9-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:8 -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s51 ; GFX9-NEXT: s_mov_b32 s13, s50 @@ -13257,8 +13257,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX9-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX9-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX9-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX9-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX9-NEXT: s_cbranch_execnz .LBB17_4 ; GFX9-NEXT: .LBB17_5: ; GFX9-NEXT: s_endpgm @@ -13285,7 +13285,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-NEXT: s_mov_b32 s33, s10 ; GFX1064-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13322,7 +13322,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: ; %bb.3: ; GFX1064-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 ; GFX1064-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-NEXT: .LBB17_4: ; %atomicrmw.start @@ -13346,7 +13346,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-NEXT: s_mov_b32 s12, s51 ; GFX1064-NEXT: s_mov_b32 s13, s50 @@ -13363,8 +13363,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1064-NEXT: .LBB17_5: ; GFX1064-NEXT: s_endpgm @@ -13391,7 +13391,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-NEXT: s_mov_b32 s33, s10 ; GFX1032-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13420,7 +13420,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1032-NEXT: s_mov_b32 s62, 0 +; GFX1032-NEXT: s_mov_b32 s54, 0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 ; GFX1032-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-NEXT: s_xor_b32 s0, exec_lo, s0 @@ -13451,7 +13451,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-NEXT: s_mov_b32 s12, s51 ; GFX1032-NEXT: s_mov_b32 s13, s50 @@ -13468,8 +13468,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1032-NEXT: .LBB17_5: ; GFX1032-NEXT: s_endpgm @@ -13497,7 +13497,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: s_mov_b32 s14, s33 ; GFX1164-NEXT: s_mov_b32 s32, 32 ; GFX1164-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-NEXT: v_mov_b32_e32 v41, 0 @@ -13527,7 +13527,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: ; %bb.3: ; GFX1164-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 ; GFX1164-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x1 @@ -13551,7 +13551,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-NEXT: s_mov_b32 s12, s51 ; GFX1164-NEXT: s_mov_b32 s13, s50 @@ -13565,8 +13565,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1164-NEXT: .LBB17_5: ; GFX1164-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13595,7 +13595,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: s_mov_b32 s32, 32 ; GFX1132-NEXT: s_mov_b32 s33, s15 ; GFX1132-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-NEXT: v_mov_b32_e32 v41, 0 @@ -13615,7 +13615,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: s_cbranch_scc1 .LBB17_1 ; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0 -; GFX1132-NEXT: s_mov_b32 s62, 0 +; GFX1132-NEXT: s_mov_b32 s54, 0 ; GFX1132-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v0 @@ -13644,7 +13644,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-NEXT: s_mov_b32 s12, s51 ; GFX1132-NEXT: s_mov_b32 s13, s50 @@ -13657,8 +13657,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-NEXT: s_cbranch_execnz .LBB17_4 ; GFX1132-NEXT: .LBB17_5: ; GFX1132-NEXT: s_set_inst_prefetch_distance 0x2 @@ -13667,22 +13667,22 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX7LESS-DPP: ; %bb.0: ; GFX7LESS-DPP-NEXT: s_movk_i32 s32, 0x800 -; GFX7LESS-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX7LESS-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s82, -1 -; GFX7LESS-DPP-NEXT: s_mov_b32 s83, 0xe8f000 -; GFX7LESS-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX7LESS-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 +; GFX7LESS-DPP-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xe8f000 +; GFX7LESS-DPP-NEXT: s_add_u32 s64, s64, s11 +; GFX7LESS-DPP-NEXT: s_addc_u32 s65, s65, 0 ; GFX7LESS-DPP-NEXT: s_mov_b32 s33, s10 ; GFX7LESS-DPP-NEXT: s_mov_b32 s50, s9 ; GFX7LESS-DPP-NEXT: s_mov_b32 s51, s8 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[48:49], s[0:1] -; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[64:65], s[4:5], 0x9 -; GFX7LESS-DPP-NEXT: s_mov_b32 s67, 0xf000 -; GFX7LESS-DPP-NEXT: s_mov_b32 s66, -1 +; GFX7LESS-DPP-NEXT: s_load_dwordx2 s[52:53], s[4:5], 0x9 +; GFX7LESS-DPP-NEXT: s_mov_b32 s55, 0xf000 +; GFX7LESS-DPP-NEXT: s_mov_b32 s54, -1 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] @@ -13700,23 +13700,23 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v40, v0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v41, v1 -; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[64:67], 0 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[52:53], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dwordx2 v[0:1], off, s[52:55], 0 +; GFX7LESS-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX7LESS-DPP-NEXT: .LBB17_1: ; %atomicrmw.start ; GFX7LESS-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7LESS-DPP-NEXT: s_waitcnt vmcnt(0) ; GFX7LESS-DPP-NEXT: v_add_f64 v[2:3], v[0:1], -v[40:41] -; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 offset:4 -; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[80:83], 0 +; GFX7LESS-DPP-NEXT: buffer_store_dword v1, off, s[64:67], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_store_dword v0, off, s[64:67], 0 ; GFX7LESS-DPP-NEXT: s_add_u32 s8, s36, 44 -; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:12 -; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:8 +; GFX7LESS-DPP-NEXT: buffer_store_dword v3, off, s[64:67], 0 offset:12 +; GFX7LESS-DPP-NEXT: buffer_store_dword v2, off, s[64:67], 0 offset:8 ; GFX7LESS-DPP-NEXT: s_addc_u32 s9, s37, 0 ; GFX7LESS-DPP-NEXT: s_getpc_b64 s[0:1] ; GFX7LESS-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 @@ -13730,37 +13730,37 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX7LESS-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX7LESS-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX7LESS-DPP-NEXT: s_mov_b32 s12, s51 ; GFX7LESS-DPP-NEXT: s_mov_b32 s13, s50 ; GFX7LESS-DPP-NEXT: s_mov_b32 s14, s33 ; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v31, v42 -; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX7LESS-DPP-NEXT: s_mov_b64 s[2:3], s[66:67] ; GFX7LESS-DPP-NEXT: s_waitcnt expcnt(0) -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s64 -; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s65 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v2, s52 +; GFX7LESS-DPP-NEXT: v_mov_b32_e32 v3, s53 ; GFX7LESS-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GFX7LESS-DPP-NEXT: v_and_b32_e32 v2, 1, v0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[80:83], 0 -; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 offset:4 +; GFX7LESS-DPP-NEXT: buffer_load_dword v0, off, s[64:67], 0 +; GFX7LESS-DPP-NEXT: buffer_load_dword v1, off, s[64:67], 0 offset:4 ; GFX7LESS-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GFX7LESS-DPP-NEXT: s_or_b64 s[52:53], vcc, s[52:53] -; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[52:53] +; GFX7LESS-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX7LESS-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX7LESS-DPP-NEXT: s_cbranch_execnz .LBB17_1 ; GFX7LESS-DPP-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7LESS-DPP-NEXT: s_endpgm ; ; GFX9-DPP-LABEL: global_atomic_fsub_double_uni_address_div_value_default_scope_strictfp: ; GFX9-DPP: ; %bb.0: -; GFX9-DPP-NEXT: s_mov_b32 s80, SCRATCH_RSRC_DWORD0 -; GFX9-DPP-NEXT: s_mov_b32 s81, SCRATCH_RSRC_DWORD1 -; GFX9-DPP-NEXT: s_mov_b32 s82, -1 -; GFX9-DPP-NEXT: s_mov_b32 s83, 0xe00000 -; GFX9-DPP-NEXT: s_add_u32 s80, s80, s11 -; GFX9-DPP-NEXT: s_addc_u32 s81, s81, 0 +; GFX9-DPP-NEXT: s_mov_b32 s68, SCRATCH_RSRC_DWORD0 +; GFX9-DPP-NEXT: s_mov_b32 s69, SCRATCH_RSRC_DWORD1 +; GFX9-DPP-NEXT: s_mov_b32 s70, -1 +; GFX9-DPP-NEXT: s_mov_b32 s71, 0xe00000 +; GFX9-DPP-NEXT: s_add_u32 s68, s68, s11 +; GFX9-DPP-NEXT: s_addc_u32 s69, s69, 0 ; GFX9-DPP-NEXT: s_mov_b64 s[36:37], s[4:5] ; GFX9-DPP-NEXT: s_mov_b32 s51, s8 ; GFX9-DPP-NEXT: s_add_u32 s8, s36, 44 @@ -13775,17 +13775,17 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-DPP-NEXT: s_mov_b32 s33, s10 ; GFX9-DPP-NEXT: s_mov_b64 s[34:35], s[6:7] -; GFX9-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX9-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: s_movk_i32 s32, 0x800 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -13839,10 +13839,10 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX9-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX9-DPP-NEXT: ; %bb.1: -; GFX9-DPP-NEXT: s_load_dwordx2 s[62:63], s[36:37], 0x24 +; GFX9-DPP-NEXT: s_load_dwordx2 s[54:55], s[36:37], 0x24 ; GFX9-DPP-NEXT: s_mov_b64 s[64:65], 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[62:63] +; GFX9-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[54:55] ; GFX9-DPP-NEXT: .LBB17_2: ; %atomicrmw.start ; GFX9-DPP-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-DPP-NEXT: s_waitcnt vmcnt(0) @@ -13853,31 +13853,31 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX9-DPP-NEXT: s_add_u32 s0, s0, __atomic_compare_exchange@gotpcrel32@lo+4 ; GFX9-DPP-NEXT: s_addc_u32 s1, s1, __atomic_compare_exchange@gotpcrel32@hi+12 ; GFX9-DPP-NEXT: s_load_dwordx2 s[16:17], s[0:1], 0x0 -; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[80:81] -; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[80:83], 0 offset:4 -; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[80:83], 0 +; GFX9-DPP-NEXT: s_mov_b64 s[0:1], s[68:69] +; GFX9-DPP-NEXT: buffer_store_dword v2, off, s[68:71], 0 offset:4 +; GFX9-DPP-NEXT: buffer_store_dword v1, off, s[68:71], 0 ; GFX9-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[80:83], 0 offset:12 -; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[80:83], 0 offset:8 -; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-DPP-NEXT: buffer_store_dword v4, off, s[68:71], 0 offset:12 +; GFX9-DPP-NEXT: buffer_store_dword v3, off, s[68:71], 0 offset:8 +; GFX9-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-DPP-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-DPP-NEXT: s_mov_b32 s12, s51 ; GFX9-DPP-NEXT: s_mov_b32 s13, s50 ; GFX9-DPP-NEXT: s_mov_b32 s14, s33 ; GFX9-DPP-NEXT: v_mov_b32_e32 v31, v40 -; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[82:83] +; GFX9-DPP-NEXT: s_mov_b64 s[2:3], s[70:71] ; GFX9-DPP-NEXT: v_mov_b32_e32 v0, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s62 -; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s63 +; GFX9-DPP-NEXT: v_mov_b32_e32 v2, s54 +; GFX9-DPP-NEXT: v_mov_b32_e32 v3, s55 ; GFX9-DPP-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v5, 8 ; GFX9-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-DPP-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[80:83], 0 -; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[80:83], 0 offset:4 +; GFX9-DPP-NEXT: buffer_load_dword v1, off, s[68:71], 0 +; GFX9-DPP-NEXT: buffer_load_dword v2, off, s[68:71], 0 offset:4 ; GFX9-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX9-DPP-NEXT: s_or_b64 s[64:65], vcc, s[64:65] @@ -13908,7 +13908,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1064-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1064-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1064-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1064-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1064-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -13964,7 +13964,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1064-DPP-NEXT: ; %bb.1: ; GFX1064-DPP-NEXT: s_load_dwordx2 s[52:53], s[34:35], 0x24 -; GFX1064-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1064-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1064-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1064-DPP-NEXT: global_load_dwordx2 v[1:2], v0, s[52:53] ; GFX1064-DPP-NEXT: .LBB17_2: ; %atomicrmw.start @@ -13988,7 +13988,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1064-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1064-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1064-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1064-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1064-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1064-DPP-NEXT: s_mov_b32 s13, s50 @@ -14005,8 +14005,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1064-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1064-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1064-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1064-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[62:63] +; GFX1064-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1064-DPP-NEXT: s_andn2_b64 exec, exec, s[54:55] ; GFX1064-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1064-DPP-NEXT: .LBB17_3: ; GFX1064-DPP-NEXT: s_endpgm @@ -14033,7 +14033,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX1032-DPP-NEXT: s_mov_b32 s33, s10 ; GFX1032-DPP-NEXT: s_mov_b64 s[36:37], s[6:7] -; GFX1032-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1032-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1032-DPP-NEXT: v_or3_b32 v40, v0, v1, v2 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] @@ -14078,7 +14078,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v41, v8 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v0, 0 ; GFX1032-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1032-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1032-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1032-DPP-NEXT: s_and_saveexec_b32 s0, vcc_lo ; GFX1032-DPP-NEXT: s_cbranch_execz .LBB17_3 @@ -14107,7 +14107,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1032-DPP-NEXT: s_mov_b64 s[0:1], s[64:65] ; GFX1032-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1032-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1032-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1032-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1032-DPP-NEXT: s_mov_b32 s13, s50 @@ -14124,8 +14124,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1032-DPP-NEXT: buffer_load_dword v2, off, s[64:67], 0 offset:4 ; GFX1032-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1032-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1032-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s62 +; GFX1032-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1032-DPP-NEXT: s_andn2_b32 exec_lo, exec_lo, s54 ; GFX1032-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1032-DPP-NEXT: .LBB17_3: ; GFX1032-DPP-NEXT: s_endpgm @@ -14153,7 +14153,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_mov_b32 s14, s33 ; GFX1164-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1164-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1164-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1164-DPP-NEXT: s_or_saveexec_b64 s[0:1], -1 @@ -14208,7 +14208,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: s_cbranch_execz .LBB17_3 ; GFX1164-DPP-NEXT: ; %bb.1: ; GFX1164-DPP-NEXT: s_load_b64 s[52:53], s[34:35], 0x24 -; GFX1164-DPP-NEXT: s_mov_b64 s[62:63], 0 +; GFX1164-DPP-NEXT: s_mov_b64 s[54:55], 0 ; GFX1164-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164-DPP-NEXT: global_load_b64 v[1:2], v0, s[52:53] ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x1 @@ -14232,7 +14232,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_mov_b32_e32 v6, 0 ; GFX1164-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1164-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1164-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1164-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1164-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1164-DPP-NEXT: s_mov_b32 s13, s50 @@ -14246,8 +14246,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1164-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1164-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1164-DPP-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; GFX1164-DPP-NEXT: s_or_b64 s[62:63], vcc, s[62:63] -; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[62:63] +; GFX1164-DPP-NEXT: s_or_b64 s[54:55], vcc, s[54:55] +; GFX1164-DPP-NEXT: s_and_not1_b64 exec, exec, s[54:55] ; GFX1164-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1164-DPP-NEXT: .LBB17_3: ; GFX1164-DPP-NEXT: s_set_inst_prefetch_distance 0x2 @@ -14276,7 +14276,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: s_mov_b32 s32, 32 ; GFX1132-DPP-NEXT: s_mov_b32 s33, s15 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v40, v0 -; GFX1132-DPP-NEXT: s_mov_b64 s[46:47], s[2:3] +; GFX1132-DPP-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX1132-DPP-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132-DPP-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX1132-DPP-NEXT: s_or_saveexec_b32 s0, -1 @@ -14318,7 +14318,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1132-DPP-NEXT: v_mov_b32_e32 v42, v9 -; GFX1132-DPP-NEXT: s_mov_b32 s62, 0 +; GFX1132-DPP-NEXT: s_mov_b32 s54, 0 ; GFX1132-DPP-NEXT: s_mov_b32 s0, exec_lo ; GFX1132-DPP-NEXT: v_cmpx_eq_u32_e32 0, v1 ; GFX1132-DPP-NEXT: s_cbranch_execz .LBB17_3 @@ -14344,7 +14344,7 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_dual_mov_b32 v5, 8 :: v_dual_mov_b32 v6, 0 ; GFX1132-DPP-NEXT: v_mov_b32_e32 v7, 0 ; GFX1132-DPP-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX1132-DPP-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX1132-DPP-NEXT: s_mov_b64 s[10:11], s[36:37] ; GFX1132-DPP-NEXT: s_mov_b32 s12, s51 ; GFX1132-DPP-NEXT: s_mov_b32 s13, s50 @@ -14357,8 +14357,8 @@ define amdgpu_kernel void @global_atomic_fsub_double_uni_address_div_value_defau ; GFX1132-DPP-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX1132-DPP-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1132-DPP-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX1132-DPP-NEXT: s_or_b32 s62, vcc_lo, s62 -; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s62 +; GFX1132-DPP-NEXT: s_or_b32 s54, vcc_lo, s54 +; GFX1132-DPP-NEXT: s_and_not1_b32 exec_lo, exec_lo, s54 ; GFX1132-DPP-NEXT: s_cbranch_execnz .LBB17_2 ; GFX1132-DPP-NEXT: .LBB17_3: ; GFX1132-DPP-NEXT: s_set_inst_prefetch_distance 0x2 diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir index 8ae89ad96a16b..da1175c02e94a 100644 --- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir @@ -33,13 +33,9 @@ body: | ; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr15 ; CHECK-NEXT: renamable $sgpr50 = COPY $sgpr14 ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY $sgpr10_sgpr11 - ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY $sgpr6_sgpr7 + ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: renamable $sgpr62_sgpr63 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) - ; CHECK-NEXT: renamable $sgpr64 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr65 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr66 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr67 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr64_sgpr65 = S_LOAD_DWORDX2_IMM renamable $sgpr34_sgpr35, 0, 0 :: (dereferenceable invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: renamable $sgpr68 = S_MOV_B32 0 ; CHECK-NEXT: renamable $sgpr69 = S_MOV_B32 0 ; CHECK-NEXT: renamable $sgpr70 = S_MOV_B32 0 @@ -57,14 +53,18 @@ body: | ; CHECK-NEXT: renamable $sgpr82 = S_MOV_B32 0 ; CHECK-NEXT: renamable $sgpr83 = S_MOV_B32 0 ; CHECK-NEXT: renamable $sgpr84 = S_MOV_B32 0 - ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr85 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr86 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr87 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr88 = S_MOV_B32 0 + ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr52_sgpr53 = IMPLICIT_DEF ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL renamable $sgpr52_sgpr53, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr4_sgpr5 = COPY killed renamable $sgpr48_sgpr49 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr46_sgpr47 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr38_sgpr39 ; CHECK-NEXT: $sgpr8_sgpr9 = COPY killed renamable $sgpr34_sgpr35 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37 ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr50 @@ -76,47 +76,47 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) - ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F + ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F + ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr63, 1, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 + ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr65, 1, implicit-def dead $scc ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) - ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr62_sgpr63:0x000000000000000F, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95:0x0000000000000003 + ; CHECK-NEXT: liveins: $sgpr4_sgpr5, $sgpr64_sgpr65:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_SAVEEXEC_B64 renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr64 - ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr64 + ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr51 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr53 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr54 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr55 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr56 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr68 ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec @@ -124,10 +124,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr62_sgpr63:0x0000000000000003 + ; CHECK-NEXT: liveins: $sgpr6_sgpr7, $sgpr64_sgpr65:0x0000000000000003 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc - ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr62, 1, implicit-def dead $scc + ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr64, 1, implicit-def dead $scc ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: dead [[COPY2:%[0-9]+]]:vreg_1024 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 1a8557d25fb92..53c4a9cd229aa 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -12,17 +12,19 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: v_writelane_b32 v5, s36, 0 ; CHECK-NEXT: v_writelane_b32 v5, s37, 1 -; CHECK-NEXT: v_writelane_b32 v5, s46, 2 -; CHECK-NEXT: v_writelane_b32 v5, s47, 3 +; CHECK-NEXT: v_writelane_b32 v5, s38, 2 +; CHECK-NEXT: v_writelane_b32 v5, s39, 3 ; CHECK-NEXT: v_writelane_b32 v5, s48, 4 ; CHECK-NEXT: v_writelane_b32 v5, s49, 5 ; CHECK-NEXT: v_writelane_b32 v5, s50, 6 ; CHECK-NEXT: v_writelane_b32 v5, s51, 7 -; CHECK-NEXT: s_getpc_b64 s[24:25] ; CHECK-NEXT: v_writelane_b32 v5, s52, 8 +; CHECK-NEXT: v_writelane_b32 v5, s53, 9 +; CHECK-NEXT: s_getpc_b64 s[24:25] +; CHECK-NEXT: v_writelane_b32 v5, s54, 10 ; CHECK-NEXT: s_movk_i32 s4, 0xf0 ; CHECK-NEXT: s_mov_b32 s5, s24 -; CHECK-NEXT: v_writelane_b32 v5, s53, 9 +; CHECK-NEXT: v_writelane_b32 v5, s55, 11 ; CHECK-NEXT: s_load_dwordx16 s[44:59], s[4:5], 0x0 ; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane ; CHECK-NEXT: s_mov_b64 s[4:5], 0 @@ -77,10 +79,10 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b32 s27, s24 ; CHECK-NEXT: v_writelane_b32 v7, s19, 31 ; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0 -; CHECK-NEXT: v_writelane_b32 v5, s62, 10 -; CHECK-NEXT: v_writelane_b32 v5, s63, 11 ; CHECK-NEXT: v_writelane_b32 v5, s64, 12 ; CHECK-NEXT: v_writelane_b32 v5, s65, 13 +; CHECK-NEXT: v_writelane_b32 v5, s66, 14 +; CHECK-NEXT: s_movk_i32 s28, 0x1f0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_writelane_b32 v7, s4, 32 ; CHECK-NEXT: v_writelane_b32 v7, s5, 33 @@ -88,16 +90,14 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_writelane_b32 v7, s7, 35 ; CHECK-NEXT: v_writelane_b32 v7, s8, 36 ; CHECK-NEXT: v_writelane_b32 v7, s9, 37 -; CHECK-NEXT: v_writelane_b32 v5, s66, 14 -; CHECK-NEXT: s_movk_i32 s28, 0x1f0 -; CHECK-NEXT: s_movk_i32 s70, 0x2f0 +; CHECK-NEXT: s_movk_i32 s72, 0x2f0 ; CHECK-NEXT: s_mov_b32 s29, s24 -; CHECK-NEXT: s_mov_b32 s71, s24 +; CHECK-NEXT: s_mov_b32 s73, s24 ; CHECK-NEXT: v_writelane_b32 v7, s10, 38 ; CHECK-NEXT: v_writelane_b32 v5, s67, 15 ; CHECK-NEXT: v_writelane_b32 v7, s11, 39 ; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0 -; CHECK-NEXT: s_load_dwordx16 s[4:19], s[70:71], 0x0 +; CHECK-NEXT: s_load_dwordx16 s[4:19], s[72:73], 0x0 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1 @@ -128,19 +128,19 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s43, v7, 7 ; CHECK-NEXT: .LBB0_2: ; %bb50 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: v_readlane_b32 s36, v7, 32 -; CHECK-NEXT: v_readlane_b32 s40, v7, 36 -; CHECK-NEXT: v_readlane_b32 s41, v7, 37 -; CHECK-NEXT: v_readlane_b32 s42, v7, 38 -; CHECK-NEXT: v_readlane_b32 s43, v7, 39 +; CHECK-NEXT: v_readlane_b32 s40, v7, 32 +; CHECK-NEXT: v_readlane_b32 s44, v7, 36 +; CHECK-NEXT: v_readlane_b32 s45, v7, 37 +; CHECK-NEXT: v_readlane_b32 s46, v7, 38 +; CHECK-NEXT: v_readlane_b32 s47, v7, 39 ; CHECK-NEXT: s_mov_b32 s21, s20 ; CHECK-NEXT: s_mov_b32 s22, s20 ; CHECK-NEXT: s_mov_b32 s23, s20 -; CHECK-NEXT: v_readlane_b32 s37, v7, 33 -; CHECK-NEXT: v_readlane_b32 s38, v7, 34 +; CHECK-NEXT: v_readlane_b32 s41, v7, 33 +; CHECK-NEXT: v_readlane_b32 s42, v7, 34 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[40:43] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s39, v7, 35 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[44:47] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s43, v7, 35 ; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4 @@ -320,16 +320,16 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: v_readlane_b32 s66, v5, 14 ; CHECK-NEXT: v_readlane_b32 s65, v5, 13 ; CHECK-NEXT: v_readlane_b32 s64, v5, 12 -; CHECK-NEXT: v_readlane_b32 s63, v5, 11 -; CHECK-NEXT: v_readlane_b32 s62, v5, 10 +; CHECK-NEXT: v_readlane_b32 s55, v5, 11 +; CHECK-NEXT: v_readlane_b32 s54, v5, 10 ; CHECK-NEXT: v_readlane_b32 s53, v5, 9 ; CHECK-NEXT: v_readlane_b32 s52, v5, 8 ; CHECK-NEXT: v_readlane_b32 s51, v5, 7 ; CHECK-NEXT: v_readlane_b32 s50, v5, 6 ; CHECK-NEXT: v_readlane_b32 s49, v5, 5 ; CHECK-NEXT: v_readlane_b32 s48, v5, 4 -; CHECK-NEXT: v_readlane_b32 s47, v5, 3 -; CHECK-NEXT: v_readlane_b32 s46, v5, 2 +; CHECK-NEXT: v_readlane_b32 s39, v5, 3 +; CHECK-NEXT: v_readlane_b32 s38, v5, 2 ; CHECK-NEXT: v_readlane_b32 s37, v5, 1 ; CHECK-NEXT: v_readlane_b32 s36, v5, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index 8487e195de8e2..d7c4f6afbdade 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -134,16 +134,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: v_writelane_b32 v40, s64, 16 ; GCN-NEXT: v_writelane_b32 v40, s65, 17 ; GCN-NEXT: s_mov_b32 s50, s15 @@ -152,16 +152,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] -; GCN-NEXT: s_mov_b64 s[62:63], exec +; GCN-NEXT: s_mov_b64 s[54:55], exec ; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: v_readfirstlane_b32 s17, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] -; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] +; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s53 @@ -174,19 +174,19 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_xor_b64 exec, exec, s[64:65] ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[62:63] +; GCN-NEXT: s_mov_b64 exec, s[54:55] ; GCN-NEXT: v_readlane_b32 s65, v40, 17 ; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -218,16 +218,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: v_writelane_b32 v40, s64, 16 ; GISEL-NEXT: v_writelane_b32 v40, s65, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 @@ -236,16 +236,16 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] -; GISEL-NEXT: s_mov_b64 s[62:63], exec +; GISEL-NEXT: s_mov_b64 s[54:55], exec ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] -; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] +; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] ; GISEL-NEXT: s_mov_b32 s12, s53 @@ -258,19 +258,19 @@ define void @test_indirect_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65] ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[62:63] +; GISEL-NEXT: s_mov_b64 exec, s[54:55] ; GISEL-NEXT: v_readlane_b32 s65, v40, 17 ; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -306,16 +306,16 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: v_writelane_b32 v40, s64, 16 ; GCN-NEXT: v_writelane_b32 v40, s65, 17 ; GCN-NEXT: s_mov_b32 s50, s15 @@ -324,9 +324,9 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] -; GCN-NEXT: s_mov_b64 s[62:63], exec +; GCN-NEXT: s_mov_b64 s[54:55], exec ; GCN-NEXT: v_mov_b32_e32 v2, 0x7b ; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 @@ -334,7 +334,7 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] -; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] +; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s53 @@ -349,19 +349,19 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GCN-NEXT: s_xor_b64 exec, exec, s[64:65] ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[62:63] +; GCN-NEXT: s_mov_b64 exec, s[54:55] ; GCN-NEXT: v_readlane_b32 s65, v40, 17 ; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -393,16 +393,16 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: v_writelane_b32 v40, s64, 16 ; GISEL-NEXT: v_writelane_b32 v40, s65, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 @@ -411,9 +411,9 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] -; GISEL-NEXT: s_mov_b64 s[62:63], exec +; GISEL-NEXT: s_mov_b64 s[54:55], exec ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 @@ -421,7 +421,7 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] -; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] +; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] ; GISEL-NEXT: s_mov_b32 s12, s53 @@ -434,19 +434,19 @@ define void @test_indirect_call_vgpr_ptr_arg(ptr %fptr) { ; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65] ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[62:63] +; GISEL-NEXT: s_mov_b64 exec, s[54:55] ; GISEL-NEXT: v_readlane_b32 s65, v40, 17 ; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -482,16 +482,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: v_writelane_b32 v40, s64, 16 ; GCN-NEXT: v_writelane_b32 v40, s65, 17 ; GCN-NEXT: s_mov_b32 s50, s15 @@ -500,16 +500,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] -; GCN-NEXT: s_mov_b64 s[62:63], exec +; GCN-NEXT: s_mov_b64 s[54:55], exec ; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 ; GCN-NEXT: v_readfirstlane_b32 s17, v1 ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GCN-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] -; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] +; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s53 @@ -523,20 +523,20 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GCN-NEXT: s_xor_b64 exec, exec, s[64:65] ; GCN-NEXT: s_cbranch_execnz .LBB4_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[62:63] +; GCN-NEXT: s_mov_b64 exec, s[54:55] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 ; GCN-NEXT: v_readlane_b32 s65, v40, 17 ; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -568,16 +568,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: v_writelane_b32 v40, s64, 16 ; GISEL-NEXT: v_writelane_b32 v40, s65, 17 ; GISEL-NEXT: s_mov_b32 s50, s15 @@ -586,16 +586,16 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] -; GISEL-NEXT: s_mov_b64 s[62:63], exec +; GISEL-NEXT: s_mov_b64 s[54:55], exec ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 ; GISEL-NEXT: v_readfirstlane_b32 s17, v1 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GISEL-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] -; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] +; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] ; GISEL-NEXT: s_mov_b32 s12, s53 @@ -609,20 +609,20 @@ define i32 @test_indirect_call_vgpr_ptr_ret(ptr %fptr) { ; GISEL-NEXT: s_xor_b64 exec, exec, s[64:65] ; GISEL-NEXT: s_cbranch_execnz .LBB4_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[62:63] +; GISEL-NEXT: s_mov_b64 exec, s[54:55] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v1 ; GISEL-NEXT: v_readlane_b32 s65, v40, 17 ; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -659,16 +659,16 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: v_writelane_b32 v40, s64, 16 ; GCN-NEXT: v_writelane_b32 v40, s65, 17 ; GCN-NEXT: v_writelane_b32 v40, s66, 18 @@ -679,11 +679,11 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: s_mov_b32 s53, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] ; GCN-NEXT: v_and_b32_e32 v2, 1, v2 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 -; GCN-NEXT: s_and_saveexec_b64 s[62:63], vcc +; GCN-NEXT: s_and_saveexec_b64 s[54:55], vcc ; GCN-NEXT: s_cbranch_execz .LBB5_4 ; GCN-NEXT: ; %bb.1: ; %bb1 ; GCN-NEXT: s_mov_b64 s[64:65], exec @@ -693,7 +693,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GCN-NEXT: s_and_saveexec_b64 s[66:67], vcc ; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] -; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] +; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s53 @@ -708,21 +708,21 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GCN-NEXT: ; %bb.3: ; GCN-NEXT: s_mov_b64 exec, s[64:65] ; GCN-NEXT: .LBB5_4: ; %bb2 -; GCN-NEXT: s_or_b64 exec, exec, s[62:63] +; GCN-NEXT: s_or_b64 exec, exec, s[54:55] ; GCN-NEXT: v_readlane_b32 s67, v40, 19 ; GCN-NEXT: v_readlane_b32 s66, v40, 18 ; GCN-NEXT: v_readlane_b32 s65, v40, 17 ; GCN-NEXT: v_readlane_b32 s64, v40, 16 -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -754,16 +754,16 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: v_writelane_b32 v40, s64, 16 ; GISEL-NEXT: v_writelane_b32 v40, s65, 17 ; GISEL-NEXT: v_writelane_b32 v40, s66, 18 @@ -774,11 +774,11 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: s_mov_b32 s53, s12 ; GISEL-NEXT: s_mov_b64 s[34:35], s[10:11] ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] -; GISEL-NEXT: s_mov_b64 s[46:47], s[6:7] +; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[48:49], s[4:5] ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; GISEL-NEXT: s_and_saveexec_b64 s[62:63], vcc +; GISEL-NEXT: s_and_saveexec_b64 s[54:55], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GISEL-NEXT: ; %bb.1: ; %bb1 ; GISEL-NEXT: s_mov_b64 s[64:65], exec @@ -788,7 +788,7 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GISEL-NEXT: s_and_saveexec_b64 s[66:67], vcc ; GISEL-NEXT: s_mov_b64 s[4:5], s[48:49] -; GISEL-NEXT: s_mov_b64 s[6:7], s[46:47] +; GISEL-NEXT: s_mov_b64 s[6:7], s[38:39] ; GISEL-NEXT: s_mov_b64 s[8:9], s[36:37] ; GISEL-NEXT: s_mov_b64 s[10:11], s[34:35] ; GISEL-NEXT: s_mov_b32 s12, s53 @@ -803,21 +803,21 @@ define void @test_indirect_call_vgpr_ptr_in_branch(ptr %fptr, i1 %cond) { ; GISEL-NEXT: ; %bb.3: ; GISEL-NEXT: s_mov_b64 exec, s[64:65] ; GISEL-NEXT: .LBB5_4: ; %bb2 -; GISEL-NEXT: s_or_b64 exec, exec, s[62:63] +; GISEL-NEXT: s_or_b64 exec, exec, s[54:55] ; GISEL-NEXT: v_readlane_b32 s67, v40, 19 ; GISEL-NEXT: v_readlane_b32 s66, v40, 18 ; GISEL-NEXT: v_readlane_b32 s65, v40, 17 ; GISEL-NEXT: v_readlane_b32 s64, v40, 16 -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -859,16 +859,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v0 @@ -882,16 +882,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -921,16 +921,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v0 @@ -944,16 +944,16 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -988,16 +988,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v41, s35, 3 ; GCN-NEXT: v_writelane_b32 v41, s36, 4 ; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s46, 6 -; GCN-NEXT: v_writelane_b32 v41, s47, 7 +; GCN-NEXT: v_writelane_b32 v41, s38, 6 +; GCN-NEXT: v_writelane_b32 v41, s39, 7 ; GCN-NEXT: v_writelane_b32 v41, s48, 8 ; GCN-NEXT: v_writelane_b32 v41, s49, 9 ; GCN-NEXT: v_writelane_b32 v41, s50, 10 ; GCN-NEXT: v_writelane_b32 v41, s51, 11 ; GCN-NEXT: v_writelane_b32 v41, s52, 12 ; GCN-NEXT: v_writelane_b32 v41, s53, 13 -; GCN-NEXT: v_writelane_b32 v41, s62, 14 -; GCN-NEXT: v_writelane_b32 v41, s63, 15 +; GCN-NEXT: v_writelane_b32 v41, s54, 14 +; GCN-NEXT: v_writelane_b32 v41, s55, 15 ; GCN-NEXT: v_mov_b32_e32 v40, v0 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1013,16 +1013,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v40 -; GCN-NEXT: v_readlane_b32 s63, v41, 15 -; GCN-NEXT: v_readlane_b32 s62, v41, 14 +; GCN-NEXT: v_readlane_b32 s55, v41, 15 +; GCN-NEXT: v_readlane_b32 s54, v41, 14 ; GCN-NEXT: v_readlane_b32 s53, v41, 13 ; GCN-NEXT: v_readlane_b32 s52, v41, 12 ; GCN-NEXT: v_readlane_b32 s51, v41, 11 ; GCN-NEXT: v_readlane_b32 s50, v41, 10 ; GCN-NEXT: v_readlane_b32 s49, v41, 9 ; GCN-NEXT: v_readlane_b32 s48, v41, 8 -; GCN-NEXT: v_readlane_b32 s47, v41, 7 -; GCN-NEXT: v_readlane_b32 s46, v41, 6 +; GCN-NEXT: v_readlane_b32 s39, v41, 7 +; GCN-NEXT: v_readlane_b32 s38, v41, 6 ; GCN-NEXT: v_readlane_b32 s37, v41, 5 ; GCN-NEXT: v_readlane_b32 s36, v41, 4 ; GCN-NEXT: v_readlane_b32 s35, v41, 3 @@ -1054,16 +1054,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v41, s35, 3 ; GISEL-NEXT: v_writelane_b32 v41, s36, 4 ; GISEL-NEXT: v_writelane_b32 v41, s37, 5 -; GISEL-NEXT: v_writelane_b32 v41, s46, 6 -; GISEL-NEXT: v_writelane_b32 v41, s47, 7 +; GISEL-NEXT: v_writelane_b32 v41, s38, 6 +; GISEL-NEXT: v_writelane_b32 v41, s39, 7 ; GISEL-NEXT: v_writelane_b32 v41, s48, 8 ; GISEL-NEXT: v_writelane_b32 v41, s49, 9 ; GISEL-NEXT: v_writelane_b32 v41, s50, 10 ; GISEL-NEXT: v_writelane_b32 v41, s51, 11 ; GISEL-NEXT: v_writelane_b32 v41, s52, 12 ; GISEL-NEXT: v_writelane_b32 v41, s53, 13 -; GISEL-NEXT: v_writelane_b32 v41, s62, 14 -; GISEL-NEXT: v_writelane_b32 v41, s63, 15 +; GISEL-NEXT: v_writelane_b32 v41, s54, 14 +; GISEL-NEXT: v_writelane_b32 v41, s55, 15 ; GISEL-NEXT: v_mov_b32_e32 v40, v0 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 @@ -1079,16 +1079,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v40 -; GISEL-NEXT: v_readlane_b32 s63, v41, 15 -; GISEL-NEXT: v_readlane_b32 s62, v41, 14 +; GISEL-NEXT: v_readlane_b32 s55, v41, 15 +; GISEL-NEXT: v_readlane_b32 s54, v41, 14 ; GISEL-NEXT: v_readlane_b32 s53, v41, 13 ; GISEL-NEXT: v_readlane_b32 s52, v41, 12 ; GISEL-NEXT: v_readlane_b32 s51, v41, 11 ; GISEL-NEXT: v_readlane_b32 s50, v41, 10 ; GISEL-NEXT: v_readlane_b32 s49, v41, 9 ; GISEL-NEXT: v_readlane_b32 s48, v41, 8 -; GISEL-NEXT: v_readlane_b32 s47, v41, 7 -; GISEL-NEXT: v_readlane_b32 s46, v41, 6 +; GISEL-NEXT: v_readlane_b32 s39, v41, 7 +; GISEL-NEXT: v_readlane_b32 s38, v41, 6 ; GISEL-NEXT: v_readlane_b32 s37, v41, 5 ; GISEL-NEXT: v_readlane_b32 s36, v41, 4 ; GISEL-NEXT: v_readlane_b32 s35, v41, 3 @@ -1127,16 +1127,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 @@ -1152,16 +1152,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -1191,16 +1191,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s8, v1 @@ -1216,16 +1216,16 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, ptr %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v2 -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 @@ -1260,16 +1260,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s35, 3 ; GCN-NEXT: v_writelane_b32 v40, s36, 4 ; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s46, 6 -; GCN-NEXT: v_writelane_b32 v40, s47, 7 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 ; GCN-NEXT: v_writelane_b32 v40, s48, 8 ; GCN-NEXT: v_writelane_b32 v40, s49, 9 ; GCN-NEXT: v_writelane_b32 v40, s50, 10 ; GCN-NEXT: v_writelane_b32 v40, s51, 11 ; GCN-NEXT: v_writelane_b32 v40, s52, 12 ; GCN-NEXT: v_writelane_b32 v40, s53, 13 -; GCN-NEXT: v_writelane_b32 v40, s62, 14 -; GCN-NEXT: v_writelane_b32 v40, s63, 15 +; GCN-NEXT: v_writelane_b32 v40, s54, 14 +; GCN-NEXT: v_writelane_b32 v40, s55, 15 ; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s6, v0 @@ -1282,16 +1282,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s63, v40, 15 -; GCN-NEXT: v_readlane_b32 s62, v40, 14 +; GCN-NEXT: v_readlane_b32 s55, v40, 15 +; GCN-NEXT: v_readlane_b32 s54, v40, 14 ; GCN-NEXT: v_readlane_b32 s53, v40, 13 ; GCN-NEXT: v_readlane_b32 s52, v40, 12 ; GCN-NEXT: v_readlane_b32 s51, v40, 11 ; GCN-NEXT: v_readlane_b32 s50, v40, 10 ; GCN-NEXT: v_readlane_b32 s49, v40, 9 ; GCN-NEXT: v_readlane_b32 s48, v40, 8 -; GCN-NEXT: v_readlane_b32 s47, v40, 7 -; GCN-NEXT: v_readlane_b32 s46, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 ; GCN-NEXT: v_readlane_b32 s37, v40, 5 ; GCN-NEXT: v_readlane_b32 s36, v40, 4 ; GCN-NEXT: v_readlane_b32 s35, v40, 3 @@ -1321,16 +1321,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s35, 3 ; GISEL-NEXT: v_writelane_b32 v40, s36, 4 ; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s46, 6 -; GISEL-NEXT: v_writelane_b32 v40, s47, 7 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 ; GISEL-NEXT: v_writelane_b32 v40, s48, 8 ; GISEL-NEXT: v_writelane_b32 v40, s49, 9 ; GISEL-NEXT: v_writelane_b32 v40, s50, 10 ; GISEL-NEXT: v_writelane_b32 v40, s51, 11 ; GISEL-NEXT: v_writelane_b32 v40, s52, 12 ; GISEL-NEXT: v_writelane_b32 v40, s53, 13 -; GISEL-NEXT: v_writelane_b32 v40, s62, 14 -; GISEL-NEXT: v_writelane_b32 v40, s63, 15 +; GISEL-NEXT: v_writelane_b32 v40, s54, 14 +; GISEL-NEXT: v_writelane_b32 v40, s55, 15 ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s6, v0 @@ -1343,16 +1343,16 @@ define void @test_indirect_tail_call_vgpr_ptr(ptr %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s63, v40, 15 -; GISEL-NEXT: v_readlane_b32 s62, v40, 14 +; GISEL-NEXT: v_readlane_b32 s55, v40, 15 +; GISEL-NEXT: v_readlane_b32 s54, v40, 14 ; GISEL-NEXT: v_readlane_b32 s53, v40, 13 ; GISEL-NEXT: v_readlane_b32 s52, v40, 12 ; GISEL-NEXT: v_readlane_b32 s51, v40, 11 ; GISEL-NEXT: v_readlane_b32 s50, v40, 10 ; GISEL-NEXT: v_readlane_b32 s49, v40, 9 ; GISEL-NEXT: v_readlane_b32 s48, v40, 8 -; GISEL-NEXT: v_readlane_b32 s47, v40, 7 -; GISEL-NEXT: v_readlane_b32 s46, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 ; GISEL-NEXT: v_readlane_b32 s37, v40, 5 ; GISEL-NEXT: v_readlane_b32 s36, v40, 4 ; GISEL-NEXT: v_readlane_b32 s35, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir index 55de5dd133700..b447272702641 100644 --- a/llvm/test/CodeGen/AMDGPU/issue48473.mir +++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir @@ -43,7 +43,7 @@ # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 # CHECK-LABEL: name: issue48473 -# CHECK: S_NOP 0, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 +# CHECK: S_NOP 0, implicit killed renamable $sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, implicit killed renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, implicit killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 --- name: issue48473 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll index 4fd9fc95b8532..dbe95a8091932 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll @@ -113,20 +113,20 @@ exit: define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; GFX9-SDAG-LABEL: test_call: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-SDAG-NEXT: s_mov_b32 s50, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s2 -; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s2 +; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-SDAG-NEXT: s_getpc_b64 s[0:1] ; GFX9-SDAG-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-SDAG-NEXT: s_mov_b32 s6, src_pops_exiting_wave_id -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], 36 -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -135,20 +135,20 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; ; GFX9-GISEL-LABEL: test_call: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-GISEL-NEXT: s_mov_b32 s50, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s2 -; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s2 +; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 ; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], 36 ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -157,12 +157,12 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; ; GFX10-LABEL: test_call: ; GFX10: ; %bb.0: -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s2 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s2 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, foo@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, foo@gotpcrel32@hi+12 @@ -171,8 +171,8 @@ define amdgpu_ps void @test_call(ptr addrspace(1) inreg %ptr) { ; GFX10-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll index c6a412a9f88b0..8b1ba393c8de8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll @@ -1738,7 +1738,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_max_f32_e32 v19, v0, v16 ; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX7-NEXT: v_max_f32_e32 v16, v14, v30 -; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 +; GFX7-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX7-NEXT: v_max_f32_e32 v4, v4, v20 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1760,7 +1760,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX7-NEXT: v_max_f32_e32 v13, v13, v29 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] +; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41] ; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1794,7 +1794,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_max_f32_e32 v19, v0, v16 ; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX8-NEXT: v_max_f32_e32 v16, v14, v30 -; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 +; GFX8-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30 ; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX8-NEXT: v_max_f32_e32 v4, v4, v20 ; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1816,7 +1816,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX8-NEXT: v_max_f32_e32 v13, v13, v29 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1850,7 +1850,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_max_f32_e32 v19, v0, v16 ; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX900-NEXT: v_max_f32_e32 v16, v14, v30 -; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 +; GFX900-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30 ; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX900-NEXT: v_max_f32_e32 v4, v4, v20 ; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1872,7 +1872,7 @@ define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX900-NEXT: v_max_f32_e32 v13, v13, v29 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] +; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41] ; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll index f7ce72efa4373..3344c73f9eb6f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll @@ -2095,14 +2095,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] +; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX7-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] +; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] +; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX7-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2119,9 +2119,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] -; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] -; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] +; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_maximum_v16f64: @@ -2214,14 +2214,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] +; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX8-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] +; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX8-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2238,9 +2238,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] -; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_maximum_v16f64: @@ -2333,14 +2333,14 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] +; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX900-NEXT: v_max_f64 v[28:29], v[28:29], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] +; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] +; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX900-NEXT: v_max_f64 v[30:31], v[30:31], v[32:33] ; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2357,9 +2357,9 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] -; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] -; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] +; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_maximum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll index 7fe4f9be2727d..7b2998cbd242f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll @@ -1738,7 +1738,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_min_f32_e32 v19, v0, v16 ; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX7-NEXT: v_min_f32_e32 v16, v14, v30 -; GFX7-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 +; GFX7-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30 ; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX7-NEXT: v_min_f32_e32 v4, v4, v20 ; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1760,7 +1760,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX7-NEXT: v_min_f32_e32 v13, v13, v29 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] +; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41] ; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1794,7 +1794,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_min_f32_e32 v19, v0, v16 ; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX8-NEXT: v_min_f32_e32 v16, v14, v30 -; GFX8-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 +; GFX8-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30 ; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX8-NEXT: v_min_f32_e32 v4, v4, v20 ; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1816,7 +1816,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX8-NEXT: v_min_f32_e32 v13, v13, v29 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41] ; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] @@ -1850,7 +1850,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_min_f32_e32 v19, v0, v16 ; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 ; GFX900-NEXT: v_min_f32_e32 v16, v14, v30 -; GFX900-NEXT: v_cmp_o_f32_e64 s[38:39], v14, v30 +; GFX900-NEXT: v_cmp_o_f32_e64 s[40:41], v14, v30 ; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 ; GFX900-NEXT: v_min_f32_e32 v4, v4, v20 ; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 @@ -1872,7 +1872,7 @@ define <16 x float> @v_minimum_v16f32(<16 x float> %src0, <16 x float> %src1) { ; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 ; GFX900-NEXT: v_min_f32_e32 v13, v13, v29 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc -; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[38:39] +; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[40:41] ; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] ; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] ; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll index ab20fd88091d9..1d1673315f6ff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll @@ -2095,14 +2095,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX7-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] +; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX7-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX7-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX7-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] +; GFX7-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] +; GFX7-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX7-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX7-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2119,9 +2119,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX7-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX7-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX7-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] -; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] -; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] +; GFX7-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] +; GFX7-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_minimum_v16f64: @@ -2214,14 +2214,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX8-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] +; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX8-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX8-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX8-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] +; GFX8-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] +; GFX8-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX8-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX8-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2238,9 +2238,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX8-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX8-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX8-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] -; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] -; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] +; GFX8-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] +; GFX8-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_minimum_v16f64: @@ -2333,14 +2333,14 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:116 ; GFX900-NEXT: v_cndmask_b32_e64 v26, v26, 0, s[28:29] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[38:39], v[28:29], v[31:32] +; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[28:29], v[31:32] ; GFX900-NEXT: v_min_f64 v[28:29], v[28:29], v[31:32] ; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 ; GFX900-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:128 ; GFX900-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:124 -; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[38:39] +; GFX900-NEXT: v_cndmask_b32_e64 v28, v28, 0, s[40:41] ; GFX900-NEXT: s_waitcnt vmcnt(0) -; GFX900-NEXT: v_cmp_u_f64_e64 s[40:41], v[30:31], v[32:33] +; GFX900-NEXT: v_cmp_u_f64_e64 s[42:43], v[30:31], v[32:33] ; GFX900-NEXT: v_min_f64 v[30:31], v[30:31], v[32:33] ; GFX900-NEXT: v_mov_b32_e32 v32, 0x7ff80000 ; GFX900-NEXT: v_cndmask_b32_e32 v1, v1, v32, vcc @@ -2357,9 +2357,9 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1) ; GFX900-NEXT: v_cndmask_b32_e64 v23, v23, v32, s[24:25] ; GFX900-NEXT: v_cndmask_b32_e64 v25, v25, v32, s[26:27] ; GFX900-NEXT: v_cndmask_b32_e64 v27, v27, v32, s[28:29] -; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[38:39] -; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[40:41] -; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v29, v29, v32, s[40:41] +; GFX900-NEXT: v_cndmask_b32_e64 v31, v31, v32, s[42:43] +; GFX900-NEXT: v_cndmask_b32_e64 v30, v30, 0, s[42:43] ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX950-LABEL: v_minimum_v16f64: diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll index 55e1c3842aa6f..989ef6f981d9d 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll @@ -68,12 +68,12 @@ define amdgpu_kernel void @workgroup_ids_kernel() { define amdgpu_kernel void @caller() { ; GFX9-SDAG-LABEL: caller: ; GFX9-SDAG: ; %bb.0: -; GFX9-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-SDAG-NEXT: s_mov_b32 s50, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-SDAG-NEXT: s_add_u32 s48, s48, s11 -; GFX9-SDAG-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s11 +; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-SDAG-NEXT: s_mov_b32 s12, s8 ; GFX9-SDAG-NEXT: s_add_u32 s8, s4, 36 ; GFX9-SDAG-NEXT: s_addc_u32 s9, s5, 0 @@ -86,9 +86,9 @@ define amdgpu_kernel void @caller() { ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12 ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -97,12 +97,12 @@ define amdgpu_kernel void @caller() { ; ; GFX9-GISEL-LABEL: caller: ; GFX9-GISEL: ; %bb.0: -; GFX9-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-GISEL-NEXT: s_mov_b32 s50, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-GISEL-NEXT: s_add_u32 s48, s48, s11 -; GFX9-GISEL-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s11 +; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-GISEL-NEXT: s_mov_b32 s14, s8 ; GFX9-GISEL-NEXT: s_add_u32 s8, s4, 36 ; GFX9-GISEL-NEXT: s_addc_u32 s9, s5, 0 @@ -115,10 +115,10 @@ define amdgpu_kernel void @caller() { ; GFX9-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s14 -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13] ; GFX9-GISEL-NEXT: s_mov_b32 s12, s14 ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 @@ -128,12 +128,12 @@ define amdgpu_kernel void @caller() { ; ; GFX9ARCH-SDAG-LABEL: caller: ; GFX9ARCH-SDAG: ; %bb.0: -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s50, -1 -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9ARCH-SDAG-NEXT: s_add_u32 s48, s48, s8 -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s49, s49, 0 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1 +; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s8 +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0 ; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s4, 36 ; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s5, 0 ; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[4:5] @@ -145,9 +145,9 @@ define amdgpu_kernel void @caller() { ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 ; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -156,12 +156,12 @@ define amdgpu_kernel void @caller() { ; ; GFX9ARCH-GISEL-LABEL: caller: ; GFX9ARCH-GISEL: ; %bb.0: -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s50, -1 -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9ARCH-GISEL-NEXT: s_add_u32 s48, s48, s8 -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s49, s49, 0 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1 +; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s8 +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0 ; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s4, 36 ; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s5, 0 ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[12:13], s[0:1] @@ -173,10 +173,10 @@ define amdgpu_kernel void @caller() { ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 ; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[12:13] ; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 ; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index d29e6f8c3d2c6..4fb6a0114b499 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -43,7 +43,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx8 s[96:103], s[8:9], 0x0 +; CHECK-NEXT: s_load_dwordx8 s[64:71], s[8:9], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] ; CHECK-NEXT: s_addc_u32 s1, s1, 0 @@ -63,7 +63,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b32 s14, s33 ; CHECK-NEXT: s_mov_b32 s50, s15 ; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11] -; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] ; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v45, 0 ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -74,7 +74,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z12get_local_idj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z12get_local_idj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 @@ -88,7 +88,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 @@ -105,10 +105,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_and_b32_e32 v0, 0x7ffffffc, v0 ; CHECK-NEXT: v_and_b32_e32 v1, 28, v1 -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] -; CHECK-NEXT: global_load_dword v0, v0, s[100:101] +; CHECK-NEXT: global_load_dword v0, v0, s[68:69] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 @@ -144,46 +144,46 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v43, 10, v43 ; CHECK-NEXT: v_add_nc_u32_e32 v46, 0x3c05, v0 ; CHECK-NEXT: v_mov_b32_e32 v47, 0 -; CHECK-NEXT: s_mov_b32 s63, 0 +; CHECK-NEXT: s_mov_b32 s55, 0 ; CHECK-NEXT: .LBB0_5: ; =>This Loop Header: Depth=1 ; CHECK-NEXT: ; Child Loop BB0_8 Depth 2 ; CHECK-NEXT: ; Child Loop BB0_20 Depth 2 -; CHECK-NEXT: v_add_nc_u32_e32 v0, s63, v44 -; CHECK-NEXT: s_lshl_b32 s4, s63, 5 -; CHECK-NEXT: s_add_i32 s62, s63, 1 -; CHECK-NEXT: s_add_i32 s5, s63, 5 -; CHECK-NEXT: v_or3_b32 v57, s4, v43, s62 +; CHECK-NEXT: v_add_nc_u32_e32 v0, s55, v44 +; CHECK-NEXT: s_lshl_b32 s4, s55, 5 +; CHECK-NEXT: s_add_i32 s54, s55, 1 +; CHECK-NEXT: s_add_i32 s5, s55, 5 +; CHECK-NEXT: v_or3_b32 v57, s4, v43, s54 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: ds_read_u8 v56, v0 -; CHECK-NEXT: v_mov_b32_e32 v58, s62 -; CHECK-NEXT: s_mov_b32 s64, exec_lo +; CHECK-NEXT: v_mov_b32_e32 v58, s54 +; CHECK-NEXT: s_mov_b32 s68, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 s5, v42 ; CHECK-NEXT: s_cbranch_execz .LBB0_17 ; CHECK-NEXT: ; %bb.6: ; %.preheader2 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_mov_b32 s65, 0 -; CHECK-NEXT: s_mov_b32 s66, 0 +; CHECK-NEXT: s_mov_b32 s69, 0 +; CHECK-NEXT: s_mov_b32 s80, 0 ; CHECK-NEXT: s_branch .LBB0_8 ; CHECK-NEXT: .LBB0_7: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 -; CHECK-NEXT: s_add_i32 s66, s66, 4 -; CHECK-NEXT: s_add_i32 s4, s63, s66 -; CHECK-NEXT: v_add_nc_u32_e32 v0, s66, v57 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81 +; CHECK-NEXT: s_add_i32 s80, s80, 4 +; CHECK-NEXT: s_add_i32 s4, s55, s80 +; CHECK-NEXT: v_add_nc_u32_e32 v0, s80, v57 ; CHECK-NEXT: s_add_i32 s5, s4, 5 ; CHECK-NEXT: s_add_i32 s4, s4, 1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s5, v42 ; CHECK-NEXT: v_mov_b32_e32 v58, s4 -; CHECK-NEXT: s_or_b32 s65, vcc_lo, s65 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s65 +; CHECK-NEXT: s_or_b32 s69, vcc_lo, s69 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s69 ; CHECK-NEXT: s_cbranch_execz .LBB0_16 ; CHECK-NEXT: .LBB0_8: ; Parent Loop BB0_5 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 -; CHECK-NEXT: v_add_nc_u32_e32 v59, s66, v46 -; CHECK-NEXT: v_add_nc_u32_e32 v58, s66, v57 +; CHECK-NEXT: v_add_nc_u32_e32 v59, s80, v46 +; CHECK-NEXT: v_add_nc_u32_e32 v58, s80, v57 ; CHECK-NEXT: ds_read_u8 v0, v59 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s67, s4 +; CHECK-NEXT: s_and_saveexec_b32 s81, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.9: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -194,7 +194,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -204,11 +204,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v58 ; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:1 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s67, s4 +; CHECK-NEXT: s_and_saveexec_b32 s81, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_12 ; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -219,7 +219,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -230,11 +230,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v60 ; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s67, s4 +; CHECK-NEXT: s_and_saveexec_b32 s81, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_14 ; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -245,7 +245,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -256,11 +256,11 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; CHECK-NEXT: ds_write_b32 v0, v60 ; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s67 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s81 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:3 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s67, s4 +; CHECK-NEXT: s_and_saveexec_b32 s81, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_7 ; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -271,7 +271,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -284,27 +284,27 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_branch .LBB0_7 ; CHECK-NEXT: .LBB0_16: ; %Flow45 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s69 ; CHECK-NEXT: v_mov_b32_e32 v57, v0 ; CHECK-NEXT: .LBB0_17: ; %Flow46 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64 -; CHECK-NEXT: s_mov_b32 s63, exec_lo +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68 +; CHECK-NEXT: s_mov_b32 s55, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 v58, v42 ; CHECK-NEXT: s_cbranch_execz .LBB0_23 ; CHECK-NEXT: ; %bb.18: ; %.preheader ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_mov_b32 s64, 0 +; CHECK-NEXT: s_mov_b32 s68, 0 ; CHECK-NEXT: s_inst_prefetch 0x1 ; CHECK-NEXT: s_branch .LBB0_20 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB0_19: ; in Loop: Header=BB0_20 Depth=2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s65 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s69 ; CHECK-NEXT: v_add_nc_u32_e32 v58, 1, v58 ; CHECK-NEXT: v_add_nc_u32_e32 v57, 1, v57 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v58, v42 -; CHECK-NEXT: s_or_b32 s64, vcc_lo, s64 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s64 +; CHECK-NEXT: s_or_b32 s68, vcc_lo, s68 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s68 ; CHECK-NEXT: s_cbranch_execz .LBB0_22 ; CHECK-NEXT: .LBB0_20: ; Parent Loop BB0_5 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 @@ -312,7 +312,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: ds_read_u8 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD -; CHECK-NEXT: s_and_saveexec_b32 s65, s4 +; CHECK-NEXT: s_and_saveexec_b32 s69, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_19 ; CHECK-NEXT: ; %bb.21: ; in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -323,7 +323,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -336,15 +336,15 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: .LBB0_22: ; %Flow43 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s64 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s68 ; CHECK-NEXT: .LBB0_23: ; %Flow44 ; CHECK-NEXT: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: ; %bb.24: ; in Loop: Header=BB0_5 Depth=1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s62, v45 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s54, v45 ; CHECK-NEXT: v_cmp_lt_u32_e64 s4, 59, v47 ; CHECK-NEXT: v_add_nc_u32_e32 v46, 1, v46 -; CHECK-NEXT: s_mov_b32 s63, s62 +; CHECK-NEXT: s_mov_b32 s55, s54 ; CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 ; CHECK-NEXT: s_and_b32 s4, exec_lo, s4 ; CHECK-NEXT: s_or_b32 s53, s4, s53 @@ -360,7 +360,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -385,7 +385,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z14get_local_sizej@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z14get_local_sizej@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -407,8 +407,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_mul_u32_u24_e32 v1, 0x180, v63 ; CHECK-NEXT: v_lshlrev_b32_e32 v0, 5, v62 ; CHECK-NEXT: v_lshlrev_b32_e32 v4, 5, v72 -; CHECK-NEXT: v_add_co_u32 v2, s4, s96, v1 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s97, 0, s4 +; CHECK-NEXT: v_add_co_u32 v2, s4, s64, v1 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, s65, 0, s4 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v2, v0 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 @@ -443,7 +443,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_addPU3AS1Vjj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_addPU3AS1Vjj@rel32@hi+12 ; CHECK-NEXT: v_or3_b32 v2, v3, v2, v4 -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -454,8 +454,8 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: v_and_b32_e32 v0, 0x7fffc, v0 ; CHECK-NEXT: v_lshlrev_b32_e64 v44, v1, 1 ; CHECK-NEXT: v_and_b32_e32 v74, 28, v1 -; CHECK-NEXT: v_add_co_u32 v42, s4, s102, v0 -; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s103, 0, s4 +; CHECK-NEXT: v_add_co_u32 v42, s4, s70, v0 +; CHECK-NEXT: v_add_co_ci_u32_e64 v43, null, s71, 0, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, v44 ; CHECK-NEXT: v_mov_b32_e32 v0, v42 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] @@ -469,7 +469,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: ; %bb.30: ; in Loop: Header=BB0_28 Depth=1 ; CHECK-NEXT: v_xor_b32_e32 v4, v60, v58 ; CHECK-NEXT: v_lshrrev_b64 v[2:3], 16, v[56:57] -; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[98:99] +; CHECK-NEXT: v_mad_u64_u32 v[6:7], null, 0x180, v73, s[66:67] ; CHECK-NEXT: v_lshlrev_b32_e32 v10, 5, v0 ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v4 ; CHECK-NEXT: v_lshlrev_b32_e32 v8, 6, v72 @@ -504,7 +504,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_subPU3AS1Vjj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_subPU3AS1Vjj@rel32@hi+12 ; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] -; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] ; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 @@ -792,16 +792,16 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_addc_u32 s13, s13, 0 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12 ; CHECK-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 -; CHECK-NEXT: s_load_dwordx2 s[62:63], s[8:9], 0x10 +; CHECK-NEXT: s_load_dwordx2 s[54:55], s[8:9], 0x10 ; CHECK-NEXT: s_add_u32 s0, s0, s17 -; CHECK-NEXT: s_mov_b64 s[46:47], s[8:9] +; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9] ; CHECK-NEXT: s_addc_u32 s1, s1, 0 ; CHECK-NEXT: v_mov_b32_e32 v40, v0 -; CHECK-NEXT: s_add_u32 s52, s46, 40 +; CHECK-NEXT: s_add_u32 s52, s38, 40 ; CHECK-NEXT: v_mov_b32_e32 v31, v0 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: s_mov_b32 s33, s16 -; CHECK-NEXT: s_addc_u32 s53, s47, 0 +; CHECK-NEXT: s_addc_u32 s53, s39, 0 ; CHECK-NEXT: s_mov_b32 s51, s14 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z13get_global_idj@rel32@lo+4 @@ -858,7 +858,7 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_mov_b64 s[6:7], s[36:37] ; CHECK-NEXT: s_mov_b64 s[8:9], s[52:53] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: global_load_dword v0, v0, s[62:63] +; CHECK-NEXT: global_load_dword v0, v0, s[54:55] ; CHECK-NEXT: s_mov_b32 s12, s51 ; CHECK-NEXT: s_mov_b32 s13, s50 ; CHECK-NEXT: s_mov_b32 s14, s33 @@ -912,12 +912,12 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: .LBB1_5: ; %Flow4 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; CHECK-NEXT: s_mov_b32 s62, exec_lo +; CHECK-NEXT: s_mov_b32 s54, exec_lo ; CHECK-NEXT: v_cmpx_lt_u32_e64 v56, v41 ; CHECK-NEXT: s_cbranch_execz .LBB1_11 ; CHECK-NEXT: ; %bb.6: ; %.103.preheader ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: s_mov_b32 s63, 0 +; CHECK-NEXT: s_mov_b32 s55, 0 ; CHECK-NEXT: s_inst_prefetch 0x1 ; CHECK-NEXT: s_branch .LBB1_8 ; CHECK-NEXT: .p2align 6 @@ -927,8 +927,8 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: v_add_nc_u32_e32 v56, 1, v56 ; CHECK-NEXT: v_add_nc_u32_e32 v47, 1, v47 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, v56, v41 -; CHECK-NEXT: s_or_b32 s63, vcc_lo, s63 -; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s63 +; CHECK-NEXT: s_or_b32 s55, vcc_lo, s55 +; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: s_cbranch_execz .LBB1_10 ; CHECK-NEXT: .LBB1_8: ; %.103 ; CHECK-NEXT: ; Parent Loop BB1_1 Depth=1 @@ -943,8 +943,8 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: ; in Loop: Header=BB1_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 0x3c00 -; CHECK-NEXT: s_add_u32 s8, s46, 40 -; CHECK-NEXT: s_addc_u32 s9, s47, 0 +; CHECK-NEXT: s_add_u32 s8, s38, 40 +; CHECK-NEXT: s_addc_u32 s9, s39, 0 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z10atomic_incPU3AS3Vj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z10atomic_incPU3AS3Vj@rel32@hi+12 @@ -962,10 +962,10 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: .LBB1_10: ; %Flow ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: s_inst_prefetch 0x2 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s63 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: .LBB1_11: ; %Flow2 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s62 +; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s54 ; CHECK-NEXT: ; %bb.12: ; %.32 ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc_lo, s53, v45 @@ -980,8 +980,8 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s52 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: v_mov_b32_e32 v0, 1 -; CHECK-NEXT: s_add_u32 s8, s46, 40 -; CHECK-NEXT: s_addc_u32 s9, s47, 0 +; CHECK-NEXT: s_add_u32 s8, s38, 40 +; CHECK-NEXT: s_addc_u32 s9, s39, 0 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _Z7barrierj@rel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _Z7barrierj@rel32@hi+12 diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index 23b7369a11dd3..e8dacc93a8f3c 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -44,17 +44,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s46, 7 -; GFX7-NEXT: v_writelane_b32 v23, s47, 8 +; GFX7-NEXT: v_writelane_b32 v23, s38, 7 +; GFX7-NEXT: v_writelane_b32 v23, s39, 8 ; GFX7-NEXT: v_writelane_b32 v23, s48, 9 ; GFX7-NEXT: v_writelane_b32 v23, s49, 10 ; GFX7-NEXT: v_writelane_b32 v23, s50, 11 ; GFX7-NEXT: v_writelane_b32 v23, s51, 12 -; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: v_writelane_b32 v23, s52, 13 +; GFX7-NEXT: v_writelane_b32 v23, s53, 14 +; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 +; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s53, 14 +; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -71,14 +73,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_readlane_b32 s55, v23, 16 +; GFX7-NEXT: v_readlane_b32 s54, v23, 15 ; GFX7-NEXT: v_readlane_b32 s53, v23, 14 ; GFX7-NEXT: v_readlane_b32 s52, v23, 13 ; GFX7-NEXT: v_readlane_b32 s51, v23, 12 ; GFX7-NEXT: v_readlane_b32 s50, v23, 11 ; GFX7-NEXT: v_readlane_b32 s49, v23, 10 ; GFX7-NEXT: v_readlane_b32 s48, v23, 9 -; GFX7-NEXT: v_readlane_b32 s47, v23, 8 -; GFX7-NEXT: v_readlane_b32 s46, v23, 7 +; GFX7-NEXT: v_readlane_b32 s39, v23, 8 +; GFX7-NEXT: v_readlane_b32 s38, v23, 7 ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 @@ -107,17 +111,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: v_writelane_b32 v23, s35, 4 ; GFX8-NEXT: v_writelane_b32 v23, s36, 5 ; GFX8-NEXT: v_writelane_b32 v23, s37, 6 -; GFX8-NEXT: v_writelane_b32 v23, s46, 7 -; GFX8-NEXT: v_writelane_b32 v23, s47, 8 +; GFX8-NEXT: v_writelane_b32 v23, s38, 7 +; GFX8-NEXT: v_writelane_b32 v23, s39, 8 ; GFX8-NEXT: v_writelane_b32 v23, s48, 9 ; GFX8-NEXT: v_writelane_b32 v23, s49, 10 ; GFX8-NEXT: v_writelane_b32 v23, s50, 11 ; GFX8-NEXT: v_writelane_b32 v23, s51, 12 -; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_writelane_b32 v23, s52, 13 +; GFX8-NEXT: v_writelane_b32 v23, s53, 14 +; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v23, s53, 14 +; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -134,14 +140,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v23, 16 +; GFX8-NEXT: v_readlane_b32 s54, v23, 15 ; GFX8-NEXT: v_readlane_b32 s53, v23, 14 ; GFX8-NEXT: v_readlane_b32 s52, v23, 13 ; GFX8-NEXT: v_readlane_b32 s51, v23, 12 ; GFX8-NEXT: v_readlane_b32 s50, v23, 11 ; GFX8-NEXT: v_readlane_b32 s49, v23, 10 ; GFX8-NEXT: v_readlane_b32 s48, v23, 9 -; GFX8-NEXT: v_readlane_b32 s47, v23, 8 -; GFX8-NEXT: v_readlane_b32 s46, v23, 7 +; GFX8-NEXT: v_readlane_b32 s39, v23, 8 +; GFX8-NEXT: v_readlane_b32 s38, v23, 7 ; GFX8-NEXT: v_readlane_b32 s37, v23, 6 ; GFX8-NEXT: v_readlane_b32 s36, v23, 5 ; GFX8-NEXT: v_readlane_b32 s35, v23, 4 @@ -170,17 +178,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: v_writelane_b32 v23, s35, 4 ; GFX900-NEXT: v_writelane_b32 v23, s36, 5 ; GFX900-NEXT: v_writelane_b32 v23, s37, 6 -; GFX900-NEXT: v_writelane_b32 v23, s46, 7 -; GFX900-NEXT: v_writelane_b32 v23, s47, 8 +; GFX900-NEXT: v_writelane_b32 v23, s38, 7 +; GFX900-NEXT: v_writelane_b32 v23, s39, 8 ; GFX900-NEXT: v_writelane_b32 v23, s48, 9 ; GFX900-NEXT: v_writelane_b32 v23, s49, 10 ; GFX900-NEXT: v_writelane_b32 v23, s50, 11 ; GFX900-NEXT: v_writelane_b32 v23, s51, 12 -; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_writelane_b32 v23, s52, 13 +; GFX900-NEXT: v_writelane_b32 v23, s53, 14 +; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v23, s53, 14 +; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -196,14 +206,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v23, 16 +; GFX900-NEXT: v_readlane_b32 s54, v23, 15 ; GFX900-NEXT: v_readlane_b32 s53, v23, 14 ; GFX900-NEXT: v_readlane_b32 s52, v23, 13 ; GFX900-NEXT: v_readlane_b32 s51, v23, 12 ; GFX900-NEXT: v_readlane_b32 s50, v23, 11 ; GFX900-NEXT: v_readlane_b32 s49, v23, 10 ; GFX900-NEXT: v_readlane_b32 s48, v23, 9 -; GFX900-NEXT: v_readlane_b32 s47, v23, 8 -; GFX900-NEXT: v_readlane_b32 s46, v23, 7 +; GFX900-NEXT: v_readlane_b32 s39, v23, 8 +; GFX900-NEXT: v_readlane_b32 s38, v23, 7 ; GFX900-NEXT: v_readlane_b32 s37, v23, 6 ; GFX900-NEXT: v_readlane_b32 s36, v23, 5 ; GFX900-NEXT: v_readlane_b32 s35, v23, 4 @@ -232,17 +244,19 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: v_writelane_b32 v23, s35, 4 ; GFX942-NEXT: v_writelane_b32 v23, s36, 5 ; GFX942-NEXT: v_writelane_b32 v23, s37, 6 -; GFX942-NEXT: v_writelane_b32 v23, s46, 7 -; GFX942-NEXT: v_writelane_b32 v23, s47, 8 +; GFX942-NEXT: v_writelane_b32 v23, s38, 7 +; GFX942-NEXT: v_writelane_b32 v23, s39, 8 ; GFX942-NEXT: v_writelane_b32 v23, s48, 9 ; GFX942-NEXT: v_writelane_b32 v23, s49, 10 ; GFX942-NEXT: v_writelane_b32 v23, s50, 11 ; GFX942-NEXT: v_writelane_b32 v23, s51, 12 -; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v23, s52, 13 +; GFX942-NEXT: v_writelane_b32 v23, s53, 14 +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_writelane_b32 v23, s54, 15 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec -; GFX942-NEXT: v_writelane_b32 v23, s53, 14 +; GFX942-NEXT: v_writelane_b32 v23, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -256,14 +270,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v23, 16 +; GFX942-NEXT: v_readlane_b32 s54, v23, 15 ; GFX942-NEXT: v_readlane_b32 s53, v23, 14 ; GFX942-NEXT: v_readlane_b32 s52, v23, 13 ; GFX942-NEXT: v_readlane_b32 s51, v23, 12 ; GFX942-NEXT: v_readlane_b32 s50, v23, 11 ; GFX942-NEXT: v_readlane_b32 s49, v23, 10 ; GFX942-NEXT: v_readlane_b32 s48, v23, 9 -; GFX942-NEXT: v_readlane_b32 s47, v23, 8 -; GFX942-NEXT: v_readlane_b32 s46, v23, 7 +; GFX942-NEXT: v_readlane_b32 s39, v23, 8 +; GFX942-NEXT: v_readlane_b32 s38, v23, 7 ; GFX942-NEXT: v_readlane_b32 s37, v23, 6 ; GFX942-NEXT: v_readlane_b32 s36, v23, 5 ; GFX942-NEXT: v_readlane_b32 s35, v23, 4 @@ -299,14 +315,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: v_writelane_b32 v23, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v23, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v23, s46, 7 -; GFX10_1-NEXT: v_writelane_b32 v23, s47, 8 +; GFX10_1-NEXT: v_writelane_b32 v23, s38, 7 +; GFX10_1-NEXT: v_writelane_b32 v23, s39, 8 ; GFX10_1-NEXT: v_writelane_b32 v23, s48, 9 ; GFX10_1-NEXT: v_writelane_b32 v23, s49, 10 ; GFX10_1-NEXT: v_writelane_b32 v23, s50, 11 ; GFX10_1-NEXT: v_writelane_b32 v23, s51, 12 ; GFX10_1-NEXT: v_writelane_b32 v23, s52, 13 ; GFX10_1-NEXT: v_writelane_b32 v23, s53, 14 +; GFX10_1-NEXT: v_writelane_b32 v23, s54, 15 +; GFX10_1-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_1-NEXT: ;;#ASMEND @@ -316,14 +334,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16 +; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15 ; GFX10_1-NEXT: v_readlane_b32 s53, v23, 14 ; GFX10_1-NEXT: v_readlane_b32 s52, v23, 13 ; GFX10_1-NEXT: v_readlane_b32 s51, v23, 12 ; GFX10_1-NEXT: v_readlane_b32 s50, v23, 11 ; GFX10_1-NEXT: v_readlane_b32 s49, v23, 10 ; GFX10_1-NEXT: v_readlane_b32 s48, v23, 9 -; GFX10_1-NEXT: v_readlane_b32 s47, v23, 8 -; GFX10_1-NEXT: v_readlane_b32 s46, v23, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v23, 8 +; GFX10_1-NEXT: v_readlane_b32 s38, v23, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v23, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v23, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v23, 4 @@ -359,14 +379,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: v_writelane_b32 v23, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v23, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v23, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v23, s46, 7 -; GFX10_3-NEXT: v_writelane_b32 v23, s47, 8 +; GFX10_3-NEXT: v_writelane_b32 v23, s38, 7 +; GFX10_3-NEXT: v_writelane_b32 v23, s39, 8 ; GFX10_3-NEXT: v_writelane_b32 v23, s48, 9 ; GFX10_3-NEXT: v_writelane_b32 v23, s49, 10 ; GFX10_3-NEXT: v_writelane_b32 v23, s50, 11 ; GFX10_3-NEXT: v_writelane_b32 v23, s51, 12 ; GFX10_3-NEXT: v_writelane_b32 v23, s52, 13 ; GFX10_3-NEXT: v_writelane_b32 v23, s53, 14 +; GFX10_3-NEXT: v_writelane_b32 v23, s54, 15 +; GFX10_3-NEXT: v_writelane_b32 v23, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_3-NEXT: ;;#ASMEND @@ -376,14 +398,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16 +; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15 ; GFX10_3-NEXT: v_readlane_b32 s53, v23, 14 ; GFX10_3-NEXT: v_readlane_b32 s52, v23, 13 ; GFX10_3-NEXT: v_readlane_b32 s51, v23, 12 ; GFX10_3-NEXT: v_readlane_b32 s50, v23, 11 ; GFX10_3-NEXT: v_readlane_b32 s49, v23, 10 ; GFX10_3-NEXT: v_readlane_b32 s48, v23, 9 -; GFX10_3-NEXT: v_readlane_b32 s47, v23, 8 -; GFX10_3-NEXT: v_readlane_b32 s46, v23, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v23, 8 +; GFX10_3-NEXT: v_readlane_b32 s38, v23, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v23, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v23, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v23, 4 @@ -419,14 +443,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX11-NEXT: v_writelane_b32 v23, s35, 4 ; GFX11-NEXT: v_writelane_b32 v23, s36, 5 ; GFX11-NEXT: v_writelane_b32 v23, s37, 6 -; GFX11-NEXT: v_writelane_b32 v23, s46, 7 -; GFX11-NEXT: v_writelane_b32 v23, s47, 8 +; GFX11-NEXT: v_writelane_b32 v23, s38, 7 +; GFX11-NEXT: v_writelane_b32 v23, s39, 8 ; GFX11-NEXT: v_writelane_b32 v23, s48, 9 ; GFX11-NEXT: v_writelane_b32 v23, s49, 10 ; GFX11-NEXT: v_writelane_b32 v23, s50, 11 ; GFX11-NEXT: v_writelane_b32 v23, s51, 12 ; GFX11-NEXT: v_writelane_b32 v23, s52, 13 ; GFX11-NEXT: v_writelane_b32 v23, s53, 14 +; GFX11-NEXT: v_writelane_b32 v23, s54, 15 +; GFX11-NEXT: v_writelane_b32 v23, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX11-NEXT: ;;#ASMEND @@ -438,14 +464,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v23, 16 +; GFX11-NEXT: v_readlane_b32 s54, v23, 15 ; GFX11-NEXT: v_readlane_b32 s53, v23, 14 ; GFX11-NEXT: v_readlane_b32 s52, v23, 13 ; GFX11-NEXT: v_readlane_b32 s51, v23, 12 ; GFX11-NEXT: v_readlane_b32 s50, v23, 11 ; GFX11-NEXT: v_readlane_b32 s49, v23, 10 ; GFX11-NEXT: v_readlane_b32 s48, v23, 9 -; GFX11-NEXT: v_readlane_b32 s47, v23, 8 -; GFX11-NEXT: v_readlane_b32 s46, v23, 7 +; GFX11-NEXT: v_readlane_b32 s39, v23, 8 +; GFX11-NEXT: v_readlane_b32 s38, v23, 7 ; GFX11-NEXT: v_readlane_b32 s37, v23, 6 ; GFX11-NEXT: v_readlane_b32 s36, v23, 5 ; GFX11-NEXT: v_readlane_b32 s35, v23, 4 @@ -483,14 +511,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: v_writelane_b32 v23, s35, 4 ; GFX12-NEXT: v_writelane_b32 v23, s36, 5 ; GFX12-NEXT: v_writelane_b32 v23, s37, 6 -; GFX12-NEXT: v_writelane_b32 v23, s46, 7 -; GFX12-NEXT: v_writelane_b32 v23, s47, 8 +; GFX12-NEXT: v_writelane_b32 v23, s38, 7 +; GFX12-NEXT: v_writelane_b32 v23, s39, 8 ; GFX12-NEXT: v_writelane_b32 v23, s48, 9 ; GFX12-NEXT: v_writelane_b32 v23, s49, 10 ; GFX12-NEXT: v_writelane_b32 v23, s50, 11 ; GFX12-NEXT: v_writelane_b32 v23, s51, 12 ; GFX12-NEXT: v_writelane_b32 v23, s52, 13 ; GFX12-NEXT: v_writelane_b32 v23, s53, 14 +; GFX12-NEXT: v_writelane_b32 v23, s54, 15 +; GFX12-NEXT: v_writelane_b32 v23, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX12-NEXT: ;;#ASMEND @@ -503,14 +533,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v23, 16 +; GFX12-NEXT: v_readlane_b32 s54, v23, 15 ; GFX12-NEXT: v_readlane_b32 s53, v23, 14 ; GFX12-NEXT: v_readlane_b32 s52, v23, 13 ; GFX12-NEXT: v_readlane_b32 s51, v23, 12 ; GFX12-NEXT: v_readlane_b32 s50, v23, 11 ; GFX12-NEXT: v_readlane_b32 s49, v23, 10 ; GFX12-NEXT: v_readlane_b32 s48, v23, 9 -; GFX12-NEXT: v_readlane_b32 s47, v23, 8 -; GFX12-NEXT: v_readlane_b32 s46, v23, 7 +; GFX12-NEXT: v_readlane_b32 s39, v23, 8 +; GFX12-NEXT: v_readlane_b32 s38, v23, 7 ; GFX12-NEXT: v_readlane_b32 s37, v23, 6 ; GFX12-NEXT: v_readlane_b32 s36, v23, 5 ; GFX12-NEXT: v_readlane_b32 s35, v23, 4 @@ -581,15 +613,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: v_writelane_b32 v21, s35, 4 ; GFX7-NEXT: v_writelane_b32 v21, s36, 5 ; GFX7-NEXT: v_writelane_b32 v21, s37, 6 -; GFX7-NEXT: v_writelane_b32 v21, s46, 7 -; GFX7-NEXT: v_writelane_b32 v21, s47, 8 +; GFX7-NEXT: v_writelane_b32 v21, s38, 7 +; GFX7-NEXT: v_writelane_b32 v21, s39, 8 ; GFX7-NEXT: v_writelane_b32 v21, s48, 9 ; GFX7-NEXT: v_writelane_b32 v21, s49, 10 ; GFX7-NEXT: v_writelane_b32 v21, s50, 11 ; GFX7-NEXT: v_writelane_b32 v21, s51, 12 ; GFX7-NEXT: v_writelane_b32 v21, s52, 13 -; GFX7-NEXT: s_and_b64 s[4:5], 0, exec ; GFX7-NEXT: v_writelane_b32 v21, s53, 14 +; GFX7-NEXT: v_writelane_b32 v21, s54, 15 +; GFX7-NEXT: s_and_b64 s[4:5], 0, exec +; GFX7-NEXT: v_writelane_b32 v21, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX7-NEXT: ;;#ASMEND @@ -599,14 +633,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_readlane_b32 s55, v21, 16 +; GFX7-NEXT: v_readlane_b32 s54, v21, 15 ; GFX7-NEXT: v_readlane_b32 s53, v21, 14 ; GFX7-NEXT: v_readlane_b32 s52, v21, 13 ; GFX7-NEXT: v_readlane_b32 s51, v21, 12 ; GFX7-NEXT: v_readlane_b32 s50, v21, 11 ; GFX7-NEXT: v_readlane_b32 s49, v21, 10 ; GFX7-NEXT: v_readlane_b32 s48, v21, 9 -; GFX7-NEXT: v_readlane_b32 s47, v21, 8 -; GFX7-NEXT: v_readlane_b32 s46, v21, 7 +; GFX7-NEXT: v_readlane_b32 s39, v21, 8 +; GFX7-NEXT: v_readlane_b32 s38, v21, 7 ; GFX7-NEXT: v_readlane_b32 s37, v21, 6 ; GFX7-NEXT: v_readlane_b32 s36, v21, 5 ; GFX7-NEXT: v_readlane_b32 s35, v21, 4 @@ -635,15 +671,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: v_writelane_b32 v21, s35, 4 ; GFX8-NEXT: v_writelane_b32 v21, s36, 5 ; GFX8-NEXT: v_writelane_b32 v21, s37, 6 -; GFX8-NEXT: v_writelane_b32 v21, s46, 7 -; GFX8-NEXT: v_writelane_b32 v21, s47, 8 +; GFX8-NEXT: v_writelane_b32 v21, s38, 7 +; GFX8-NEXT: v_writelane_b32 v21, s39, 8 ; GFX8-NEXT: v_writelane_b32 v21, s48, 9 ; GFX8-NEXT: v_writelane_b32 v21, s49, 10 ; GFX8-NEXT: v_writelane_b32 v21, s50, 11 ; GFX8-NEXT: v_writelane_b32 v21, s51, 12 ; GFX8-NEXT: v_writelane_b32 v21, s52, 13 -; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: v_writelane_b32 v21, s53, 14 +; GFX8-NEXT: v_writelane_b32 v21, s54, 15 +; GFX8-NEXT: s_and_b64 s[4:5], 0, exec +; GFX8-NEXT: v_writelane_b32 v21, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX8-NEXT: ;;#ASMEND @@ -653,14 +691,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v21, 16 +; GFX8-NEXT: v_readlane_b32 s54, v21, 15 ; GFX8-NEXT: v_readlane_b32 s53, v21, 14 ; GFX8-NEXT: v_readlane_b32 s52, v21, 13 ; GFX8-NEXT: v_readlane_b32 s51, v21, 12 ; GFX8-NEXT: v_readlane_b32 s50, v21, 11 ; GFX8-NEXT: v_readlane_b32 s49, v21, 10 ; GFX8-NEXT: v_readlane_b32 s48, v21, 9 -; GFX8-NEXT: v_readlane_b32 s47, v21, 8 -; GFX8-NEXT: v_readlane_b32 s46, v21, 7 +; GFX8-NEXT: v_readlane_b32 s39, v21, 8 +; GFX8-NEXT: v_readlane_b32 s38, v21, 7 ; GFX8-NEXT: v_readlane_b32 s37, v21, 6 ; GFX8-NEXT: v_readlane_b32 s36, v21, 5 ; GFX8-NEXT: v_readlane_b32 s35, v21, 4 @@ -689,15 +729,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: v_writelane_b32 v21, s35, 4 ; GFX900-NEXT: v_writelane_b32 v21, s36, 5 ; GFX900-NEXT: v_writelane_b32 v21, s37, 6 -; GFX900-NEXT: v_writelane_b32 v21, s46, 7 -; GFX900-NEXT: v_writelane_b32 v21, s47, 8 +; GFX900-NEXT: v_writelane_b32 v21, s38, 7 +; GFX900-NEXT: v_writelane_b32 v21, s39, 8 ; GFX900-NEXT: v_writelane_b32 v21, s48, 9 ; GFX900-NEXT: v_writelane_b32 v21, s49, 10 ; GFX900-NEXT: v_writelane_b32 v21, s50, 11 ; GFX900-NEXT: v_writelane_b32 v21, s51, 12 ; GFX900-NEXT: v_writelane_b32 v21, s52, 13 -; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: v_writelane_b32 v21, s53, 14 +; GFX900-NEXT: v_writelane_b32 v21, s54, 15 +; GFX900-NEXT: s_and_b64 s[4:5], 0, exec +; GFX900-NEXT: v_writelane_b32 v21, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX900-NEXT: ;;#ASMEND @@ -707,14 +749,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v21, 16 +; GFX900-NEXT: v_readlane_b32 s54, v21, 15 ; GFX900-NEXT: v_readlane_b32 s53, v21, 14 ; GFX900-NEXT: v_readlane_b32 s52, v21, 13 ; GFX900-NEXT: v_readlane_b32 s51, v21, 12 ; GFX900-NEXT: v_readlane_b32 s50, v21, 11 ; GFX900-NEXT: v_readlane_b32 s49, v21, 10 ; GFX900-NEXT: v_readlane_b32 s48, v21, 9 -; GFX900-NEXT: v_readlane_b32 s47, v21, 8 -; GFX900-NEXT: v_readlane_b32 s46, v21, 7 +; GFX900-NEXT: v_readlane_b32 s39, v21, 8 +; GFX900-NEXT: v_readlane_b32 s38, v21, 7 ; GFX900-NEXT: v_readlane_b32 s37, v21, 6 ; GFX900-NEXT: v_readlane_b32 s36, v21, 5 ; GFX900-NEXT: v_readlane_b32 s35, v21, 4 @@ -743,15 +787,17 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: v_writelane_b32 v21, s35, 4 ; GFX942-NEXT: v_writelane_b32 v21, s36, 5 ; GFX942-NEXT: v_writelane_b32 v21, s37, 6 -; GFX942-NEXT: v_writelane_b32 v21, s46, 7 -; GFX942-NEXT: v_writelane_b32 v21, s47, 8 +; GFX942-NEXT: v_writelane_b32 v21, s38, 7 +; GFX942-NEXT: v_writelane_b32 v21, s39, 8 ; GFX942-NEXT: v_writelane_b32 v21, s48, 9 ; GFX942-NEXT: v_writelane_b32 v21, s49, 10 ; GFX942-NEXT: v_writelane_b32 v21, s50, 11 ; GFX942-NEXT: v_writelane_b32 v21, s51, 12 ; GFX942-NEXT: v_writelane_b32 v21, s52, 13 -; GFX942-NEXT: s_and_b64 s[60:61], 0, exec ; GFX942-NEXT: v_writelane_b32 v21, s53, 14 +; GFX942-NEXT: v_writelane_b32 v21, s54, 15 +; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: v_writelane_b32 v21, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX942-NEXT: ;;#ASMEND @@ -762,14 +808,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v21, 16 +; GFX942-NEXT: v_readlane_b32 s54, v21, 15 ; GFX942-NEXT: v_readlane_b32 s53, v21, 14 ; GFX942-NEXT: v_readlane_b32 s52, v21, 13 ; GFX942-NEXT: v_readlane_b32 s51, v21, 12 ; GFX942-NEXT: v_readlane_b32 s50, v21, 11 ; GFX942-NEXT: v_readlane_b32 s49, v21, 10 ; GFX942-NEXT: v_readlane_b32 s48, v21, 9 -; GFX942-NEXT: v_readlane_b32 s47, v21, 8 -; GFX942-NEXT: v_readlane_b32 s46, v21, 7 +; GFX942-NEXT: v_readlane_b32 s39, v21, 8 +; GFX942-NEXT: v_readlane_b32 s38, v21, 7 ; GFX942-NEXT: v_readlane_b32 s37, v21, 6 ; GFX942-NEXT: v_readlane_b32 s36, v21, 5 ; GFX942-NEXT: v_readlane_b32 s35, v21, 4 @@ -800,14 +848,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_1-NEXT: v_writelane_b32 v21, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v21, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v21, s46, 7 -; GFX10_1-NEXT: v_writelane_b32 v21, s47, 8 +; GFX10_1-NEXT: v_writelane_b32 v21, s38, 7 +; GFX10_1-NEXT: v_writelane_b32 v21, s39, 8 ; GFX10_1-NEXT: v_writelane_b32 v21, s48, 9 ; GFX10_1-NEXT: v_writelane_b32 v21, s49, 10 ; GFX10_1-NEXT: v_writelane_b32 v21, s50, 11 ; GFX10_1-NEXT: v_writelane_b32 v21, s51, 12 ; GFX10_1-NEXT: v_writelane_b32 v21, s52, 13 ; GFX10_1-NEXT: v_writelane_b32 v21, s53, 14 +; GFX10_1-NEXT: v_writelane_b32 v21, s54, 15 +; GFX10_1-NEXT: v_writelane_b32 v21, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_1-NEXT: ;;#ASMEND @@ -817,14 +867,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16 +; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15 ; GFX10_1-NEXT: v_readlane_b32 s53, v21, 14 ; GFX10_1-NEXT: v_readlane_b32 s52, v21, 13 ; GFX10_1-NEXT: v_readlane_b32 s51, v21, 12 ; GFX10_1-NEXT: v_readlane_b32 s50, v21, 11 ; GFX10_1-NEXT: v_readlane_b32 s49, v21, 10 ; GFX10_1-NEXT: v_readlane_b32 s48, v21, 9 -; GFX10_1-NEXT: v_readlane_b32 s47, v21, 8 -; GFX10_1-NEXT: v_readlane_b32 s46, v21, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v21, 8 +; GFX10_1-NEXT: v_readlane_b32 s38, v21, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v21, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v21, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v21, 4 @@ -855,14 +907,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_3-NEXT: v_writelane_b32 v21, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v21, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v21, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v21, s46, 7 -; GFX10_3-NEXT: v_writelane_b32 v21, s47, 8 +; GFX10_3-NEXT: v_writelane_b32 v21, s38, 7 +; GFX10_3-NEXT: v_writelane_b32 v21, s39, 8 ; GFX10_3-NEXT: v_writelane_b32 v21, s48, 9 ; GFX10_3-NEXT: v_writelane_b32 v21, s49, 10 ; GFX10_3-NEXT: v_writelane_b32 v21, s50, 11 ; GFX10_3-NEXT: v_writelane_b32 v21, s51, 12 ; GFX10_3-NEXT: v_writelane_b32 v21, s52, 13 ; GFX10_3-NEXT: v_writelane_b32 v21, s53, 14 +; GFX10_3-NEXT: v_writelane_b32 v21, s54, 15 +; GFX10_3-NEXT: v_writelane_b32 v21, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_3-NEXT: ;;#ASMEND @@ -872,14 +926,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16 +; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15 ; GFX10_3-NEXT: v_readlane_b32 s53, v21, 14 ; GFX10_3-NEXT: v_readlane_b32 s52, v21, 13 ; GFX10_3-NEXT: v_readlane_b32 s51, v21, 12 ; GFX10_3-NEXT: v_readlane_b32 s50, v21, 11 ; GFX10_3-NEXT: v_readlane_b32 s49, v21, 10 ; GFX10_3-NEXT: v_readlane_b32 s48, v21, 9 -; GFX10_3-NEXT: v_readlane_b32 s47, v21, 8 -; GFX10_3-NEXT: v_readlane_b32 s46, v21, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v21, 8 +; GFX10_3-NEXT: v_readlane_b32 s38, v21, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v21, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v21, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v21, 4 @@ -909,14 +965,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: v_writelane_b32 v21, s35, 4 ; GFX11-NEXT: v_writelane_b32 v21, s36, 5 ; GFX11-NEXT: v_writelane_b32 v21, s37, 6 -; GFX11-NEXT: v_writelane_b32 v21, s46, 7 -; GFX11-NEXT: v_writelane_b32 v21, s47, 8 +; GFX11-NEXT: v_writelane_b32 v21, s38, 7 +; GFX11-NEXT: v_writelane_b32 v21, s39, 8 ; GFX11-NEXT: v_writelane_b32 v21, s48, 9 ; GFX11-NEXT: v_writelane_b32 v21, s49, 10 ; GFX11-NEXT: v_writelane_b32 v21, s50, 11 ; GFX11-NEXT: v_writelane_b32 v21, s51, 12 ; GFX11-NEXT: v_writelane_b32 v21, s52, 13 ; GFX11-NEXT: v_writelane_b32 v21, s53, 14 +; GFX11-NEXT: v_writelane_b32 v21, s54, 15 +; GFX11-NEXT: v_writelane_b32 v21, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX11-NEXT: ;;#ASMEND @@ -928,14 +986,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v21, 16 +; GFX11-NEXT: v_readlane_b32 s54, v21, 15 ; GFX11-NEXT: v_readlane_b32 s53, v21, 14 ; GFX11-NEXT: v_readlane_b32 s52, v21, 13 ; GFX11-NEXT: v_readlane_b32 s51, v21, 12 ; GFX11-NEXT: v_readlane_b32 s50, v21, 11 ; GFX11-NEXT: v_readlane_b32 s49, v21, 10 ; GFX11-NEXT: v_readlane_b32 s48, v21, 9 -; GFX11-NEXT: v_readlane_b32 s47, v21, 8 -; GFX11-NEXT: v_readlane_b32 s46, v21, 7 +; GFX11-NEXT: v_readlane_b32 s39, v21, 8 +; GFX11-NEXT: v_readlane_b32 s38, v21, 7 ; GFX11-NEXT: v_readlane_b32 s37, v21, 6 ; GFX11-NEXT: v_readlane_b32 s36, v21, 5 ; GFX11-NEXT: v_readlane_b32 s35, v21, 4 @@ -969,14 +1029,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX12-NEXT: v_writelane_b32 v21, s35, 4 ; GFX12-NEXT: v_writelane_b32 v21, s36, 5 ; GFX12-NEXT: v_writelane_b32 v21, s37, 6 -; GFX12-NEXT: v_writelane_b32 v21, s46, 7 -; GFX12-NEXT: v_writelane_b32 v21, s47, 8 +; GFX12-NEXT: v_writelane_b32 v21, s38, 7 +; GFX12-NEXT: v_writelane_b32 v21, s39, 8 ; GFX12-NEXT: v_writelane_b32 v21, s48, 9 ; GFX12-NEXT: v_writelane_b32 v21, s49, 10 ; GFX12-NEXT: v_writelane_b32 v21, s50, 11 ; GFX12-NEXT: v_writelane_b32 v21, s51, 12 ; GFX12-NEXT: v_writelane_b32 v21, s52, 13 ; GFX12-NEXT: v_writelane_b32 v21, s53, 14 +; GFX12-NEXT: v_writelane_b32 v21, s54, 15 +; GFX12-NEXT: v_writelane_b32 v21, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX12-NEXT: ;;#ASMEND @@ -985,14 +1047,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s55, v21, 16 +; GFX12-NEXT: v_readlane_b32 s54, v21, 15 ; GFX12-NEXT: v_readlane_b32 s53, v21, 14 ; GFX12-NEXT: v_readlane_b32 s52, v21, 13 ; GFX12-NEXT: v_readlane_b32 s51, v21, 12 ; GFX12-NEXT: v_readlane_b32 s50, v21, 11 ; GFX12-NEXT: v_readlane_b32 s49, v21, 10 ; GFX12-NEXT: v_readlane_b32 s48, v21, 9 -; GFX12-NEXT: v_readlane_b32 s47, v21, 8 -; GFX12-NEXT: v_readlane_b32 s46, v21, 7 +; GFX12-NEXT: v_readlane_b32 s39, v21, 8 +; GFX12-NEXT: v_readlane_b32 s38, v21, 7 ; GFX12-NEXT: v_readlane_b32 s37, v21, 6 ; GFX12-NEXT: v_readlane_b32 s36, v21, 5 ; GFX12-NEXT: v_readlane_b32 s35, v21, 4 @@ -1055,8 +1119,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX7-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX7-NEXT: s_mov_b64 exec, s[4:5] -; GFX7-NEXT: v_writelane_b32 v23, s28, 15 -; GFX7-NEXT: v_writelane_b32 v23, s29, 16 +; GFX7-NEXT: v_writelane_b32 v23, s28, 17 +; GFX7-NEXT: v_writelane_b32 v23, s29, 18 ; GFX7-NEXT: v_writelane_b32 v23, s30, 0 ; GFX7-NEXT: v_writelane_b32 v23, s31, 1 ; GFX7-NEXT: v_writelane_b32 v23, s33, 2 @@ -1064,21 +1128,23 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: v_writelane_b32 v23, s35, 4 ; GFX7-NEXT: v_writelane_b32 v23, s36, 5 ; GFX7-NEXT: v_writelane_b32 v23, s37, 6 -; GFX7-NEXT: v_writelane_b32 v23, s46, 7 -; GFX7-NEXT: v_writelane_b32 v23, s47, 8 +; GFX7-NEXT: v_writelane_b32 v23, s38, 7 +; GFX7-NEXT: v_writelane_b32 v23, s39, 8 ; GFX7-NEXT: v_writelane_b32 v23, s48, 9 ; GFX7-NEXT: v_writelane_b32 v23, s49, 10 ; GFX7-NEXT: v_writelane_b32 v23, s50, 11 -; GFX7-NEXT: s_lshr_b32 s5, s32, 6 ; GFX7-NEXT: v_writelane_b32 v23, s51, 12 +; GFX7-NEXT: v_writelane_b32 v23, s52, 13 +; GFX7-NEXT: s_lshr_b32 s5, s32, 6 +; GFX7-NEXT: v_writelane_b32 v23, s53, 14 ; GFX7-NEXT: v_lshr_b32_e64 v0, s32, 6 ; GFX7-NEXT: s_add_i32 s4, s5, 0x4240 ; GFX7-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane -; GFX7-NEXT: v_writelane_b32 v23, s52, 13 +; GFX7-NEXT: v_writelane_b32 v23, s54, 15 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 64, v0 ; GFX7-NEXT: v_writelane_b32 v22, s4, 0 ; GFX7-NEXT: s_and_b64 s[4:5], 0, exec -; GFX7-NEXT: v_writelane_b32 v23, s53, 14 +; GFX7-NEXT: v_writelane_b32 v23, s55, 16 ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use alloca0 v0 ; GFX7-NEXT: ;;#ASMEND @@ -1089,14 +1155,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX7-NEXT: ;;#ASMEND +; GFX7-NEXT: v_readlane_b32 s55, v23, 16 +; GFX7-NEXT: v_readlane_b32 s54, v23, 15 ; GFX7-NEXT: v_readlane_b32 s53, v23, 14 ; GFX7-NEXT: v_readlane_b32 s52, v23, 13 ; GFX7-NEXT: v_readlane_b32 s51, v23, 12 ; GFX7-NEXT: v_readlane_b32 s50, v23, 11 ; GFX7-NEXT: v_readlane_b32 s49, v23, 10 ; GFX7-NEXT: v_readlane_b32 s48, v23, 9 -; GFX7-NEXT: v_readlane_b32 s47, v23, 8 -; GFX7-NEXT: v_readlane_b32 s46, v23, 7 +; GFX7-NEXT: v_readlane_b32 s39, v23, 8 +; GFX7-NEXT: v_readlane_b32 s38, v23, 7 ; GFX7-NEXT: v_readlane_b32 s37, v23, 6 ; GFX7-NEXT: v_readlane_b32 s36, v23, 5 ; GFX7-NEXT: v_readlane_b32 s35, v23, 4 @@ -1104,8 +1172,8 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: v_readlane_b32 s33, v23, 2 ; GFX7-NEXT: v_readlane_b32 s31, v23, 1 ; GFX7-NEXT: v_readlane_b32 s30, v23, 0 -; GFX7-NEXT: v_readlane_b32 s28, v23, 15 -; GFX7-NEXT: v_readlane_b32 s29, v23, 16 +; GFX7-NEXT: v_readlane_b32 s28, v23, 17 +; GFX7-NEXT: v_readlane_b32 s29, v23, 18 ; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX7-NEXT: s_add_i32 s6, s32, 0x201000 ; GFX7-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload @@ -1129,19 +1197,21 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: v_writelane_b32 v22, s35, 4 ; GFX8-NEXT: v_writelane_b32 v22, s36, 5 ; GFX8-NEXT: v_writelane_b32 v22, s37, 6 -; GFX8-NEXT: v_writelane_b32 v22, s46, 7 -; GFX8-NEXT: v_writelane_b32 v22, s47, 8 +; GFX8-NEXT: v_writelane_b32 v22, s38, 7 +; GFX8-NEXT: v_writelane_b32 v22, s39, 8 ; GFX8-NEXT: v_writelane_b32 v22, s48, 9 ; GFX8-NEXT: v_writelane_b32 v22, s49, 10 ; GFX8-NEXT: v_writelane_b32 v22, s50, 11 -; GFX8-NEXT: s_lshr_b32 s4, s32, 6 ; GFX8-NEXT: v_writelane_b32 v22, s51, 12 +; GFX8-NEXT: v_writelane_b32 v22, s52, 13 +; GFX8-NEXT: s_lshr_b32 s4, s32, 6 +; GFX8-NEXT: v_writelane_b32 v22, s53, 14 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: s_add_i32 s59, s4, 0x4240 -; GFX8-NEXT: v_writelane_b32 v22, s52, 13 +; GFX8-NEXT: v_writelane_b32 v22, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v22, s53, 14 +; GFX8-NEXT: v_writelane_b32 v22, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND @@ -1151,14 +1221,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v22, 16 +; GFX8-NEXT: v_readlane_b32 s54, v22, 15 ; GFX8-NEXT: v_readlane_b32 s53, v22, 14 ; GFX8-NEXT: v_readlane_b32 s52, v22, 13 ; GFX8-NEXT: v_readlane_b32 s51, v22, 12 ; GFX8-NEXT: v_readlane_b32 s50, v22, 11 ; GFX8-NEXT: v_readlane_b32 s49, v22, 10 ; GFX8-NEXT: v_readlane_b32 s48, v22, 9 -; GFX8-NEXT: v_readlane_b32 s47, v22, 8 -; GFX8-NEXT: v_readlane_b32 s46, v22, 7 +; GFX8-NEXT: v_readlane_b32 s39, v22, 8 +; GFX8-NEXT: v_readlane_b32 s38, v22, 7 ; GFX8-NEXT: v_readlane_b32 s37, v22, 6 ; GFX8-NEXT: v_readlane_b32 s36, v22, 5 ; GFX8-NEXT: v_readlane_b32 s35, v22, 4 @@ -1187,19 +1259,21 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: v_writelane_b32 v22, s35, 4 ; GFX900-NEXT: v_writelane_b32 v22, s36, 5 ; GFX900-NEXT: v_writelane_b32 v22, s37, 6 -; GFX900-NEXT: v_writelane_b32 v22, s46, 7 -; GFX900-NEXT: v_writelane_b32 v22, s47, 8 +; GFX900-NEXT: v_writelane_b32 v22, s38, 7 +; GFX900-NEXT: v_writelane_b32 v22, s39, 8 ; GFX900-NEXT: v_writelane_b32 v22, s48, 9 ; GFX900-NEXT: v_writelane_b32 v22, s49, 10 ; GFX900-NEXT: v_writelane_b32 v22, s50, 11 -; GFX900-NEXT: s_lshr_b32 s4, s32, 6 ; GFX900-NEXT: v_writelane_b32 v22, s51, 12 +; GFX900-NEXT: v_writelane_b32 v22, s52, 13 +; GFX900-NEXT: s_lshr_b32 s4, s32, 6 +; GFX900-NEXT: v_writelane_b32 v22, s53, 14 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: s_add_i32 s59, s4, 0x4240 -; GFX900-NEXT: v_writelane_b32 v22, s52, 13 +; GFX900-NEXT: v_writelane_b32 v22, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v22, s53, 14 +; GFX900-NEXT: v_writelane_b32 v22, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND @@ -1209,14 +1283,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v22, 16 +; GFX900-NEXT: v_readlane_b32 s54, v22, 15 ; GFX900-NEXT: v_readlane_b32 s53, v22, 14 ; GFX900-NEXT: v_readlane_b32 s52, v22, 13 ; GFX900-NEXT: v_readlane_b32 s51, v22, 12 ; GFX900-NEXT: v_readlane_b32 s50, v22, 11 ; GFX900-NEXT: v_readlane_b32 s49, v22, 10 ; GFX900-NEXT: v_readlane_b32 s48, v22, 9 -; GFX900-NEXT: v_readlane_b32 s47, v22, 8 -; GFX900-NEXT: v_readlane_b32 s46, v22, 7 +; GFX900-NEXT: v_readlane_b32 s39, v22, 8 +; GFX900-NEXT: v_readlane_b32 s38, v22, 7 ; GFX900-NEXT: v_readlane_b32 s37, v22, 6 ; GFX900-NEXT: v_readlane_b32 s36, v22, 5 ; GFX900-NEXT: v_readlane_b32 s35, v22, 4 @@ -1245,16 +1321,18 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: v_writelane_b32 v22, s35, 4 ; GFX942-NEXT: v_writelane_b32 v22, s36, 5 ; GFX942-NEXT: v_writelane_b32 v22, s37, 6 -; GFX942-NEXT: v_writelane_b32 v22, s46, 7 -; GFX942-NEXT: v_writelane_b32 v22, s47, 8 +; GFX942-NEXT: v_writelane_b32 v22, s38, 7 +; GFX942-NEXT: v_writelane_b32 v22, s39, 8 ; GFX942-NEXT: v_writelane_b32 v22, s48, 9 ; GFX942-NEXT: v_writelane_b32 v22, s49, 10 ; GFX942-NEXT: v_writelane_b32 v22, s50, 11 ; GFX942-NEXT: v_writelane_b32 v22, s51, 12 -; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_writelane_b32 v22, s52, 13 -; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: v_writelane_b32 v22, s53, 14 +; GFX942-NEXT: s_add_i32 s0, s32, 64 +; GFX942-NEXT: v_writelane_b32 v22, s54, 15 +; GFX942-NEXT: v_mov_b32_e32 v0, s0 +; GFX942-NEXT: v_writelane_b32 v22, s55, 16 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND @@ -1266,14 +1344,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v22, 16 +; GFX942-NEXT: v_readlane_b32 s54, v22, 15 ; GFX942-NEXT: v_readlane_b32 s53, v22, 14 ; GFX942-NEXT: v_readlane_b32 s52, v22, 13 ; GFX942-NEXT: v_readlane_b32 s51, v22, 12 ; GFX942-NEXT: v_readlane_b32 s50, v22, 11 ; GFX942-NEXT: v_readlane_b32 s49, v22, 10 ; GFX942-NEXT: v_readlane_b32 s48, v22, 9 -; GFX942-NEXT: v_readlane_b32 s47, v22, 8 -; GFX942-NEXT: v_readlane_b32 s46, v22, 7 +; GFX942-NEXT: v_readlane_b32 s39, v22, 8 +; GFX942-NEXT: v_readlane_b32 s38, v22, 7 ; GFX942-NEXT: v_readlane_b32 s37, v22, 6 ; GFX942-NEXT: v_readlane_b32 s36, v22, 5 ; GFX942-NEXT: v_readlane_b32 s35, v22, 4 @@ -1311,28 +1391,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: v_writelane_b32 v22, s35, 4 ; GFX10_1-NEXT: v_writelane_b32 v22, s36, 5 ; GFX10_1-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_1-NEXT: v_writelane_b32 v22, s46, 7 -; GFX10_1-NEXT: v_writelane_b32 v22, s47, 8 +; GFX10_1-NEXT: v_writelane_b32 v22, s38, 7 +; GFX10_1-NEXT: v_writelane_b32 v22, s39, 8 ; GFX10_1-NEXT: v_writelane_b32 v22, s48, 9 ; GFX10_1-NEXT: v_writelane_b32 v22, s49, 10 ; GFX10_1-NEXT: v_writelane_b32 v22, s50, 11 ; GFX10_1-NEXT: v_writelane_b32 v22, s51, 12 ; GFX10_1-NEXT: v_writelane_b32 v22, s52, 13 ; GFX10_1-NEXT: v_writelane_b32 v22, s53, 14 +; GFX10_1-NEXT: v_writelane_b32 v22, s54, 15 +; GFX10_1-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16 +; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15 ; GFX10_1-NEXT: v_readlane_b32 s53, v22, 14 ; GFX10_1-NEXT: v_readlane_b32 s52, v22, 13 ; GFX10_1-NEXT: v_readlane_b32 s51, v22, 12 ; GFX10_1-NEXT: v_readlane_b32 s50, v22, 11 ; GFX10_1-NEXT: v_readlane_b32 s49, v22, 10 ; GFX10_1-NEXT: v_readlane_b32 s48, v22, 9 -; GFX10_1-NEXT: v_readlane_b32 s47, v22, 8 -; GFX10_1-NEXT: v_readlane_b32 s46, v22, 7 +; GFX10_1-NEXT: v_readlane_b32 s39, v22, 8 +; GFX10_1-NEXT: v_readlane_b32 s38, v22, 7 ; GFX10_1-NEXT: v_readlane_b32 s37, v22, 6 ; GFX10_1-NEXT: v_readlane_b32 s36, v22, 5 ; GFX10_1-NEXT: v_readlane_b32 s35, v22, 4 @@ -1370,28 +1454,32 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: v_writelane_b32 v22, s35, 4 ; GFX10_3-NEXT: v_writelane_b32 v22, s36, 5 ; GFX10_3-NEXT: v_writelane_b32 v22, s37, 6 -; GFX10_3-NEXT: v_writelane_b32 v22, s46, 7 -; GFX10_3-NEXT: v_writelane_b32 v22, s47, 8 +; GFX10_3-NEXT: v_writelane_b32 v22, s38, 7 +; GFX10_3-NEXT: v_writelane_b32 v22, s39, 8 ; GFX10_3-NEXT: v_writelane_b32 v22, s48, 9 ; GFX10_3-NEXT: v_writelane_b32 v22, s49, 10 ; GFX10_3-NEXT: v_writelane_b32 v22, s50, 11 ; GFX10_3-NEXT: v_writelane_b32 v22, s51, 12 ; GFX10_3-NEXT: v_writelane_b32 v22, s52, 13 ; GFX10_3-NEXT: v_writelane_b32 v22, s53, 14 +; GFX10_3-NEXT: v_writelane_b32 v22, s54, 15 +; GFX10_3-NEXT: v_writelane_b32 v22, s55, 16 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16 +; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15 ; GFX10_3-NEXT: v_readlane_b32 s53, v22, 14 ; GFX10_3-NEXT: v_readlane_b32 s52, v22, 13 ; GFX10_3-NEXT: v_readlane_b32 s51, v22, 12 ; GFX10_3-NEXT: v_readlane_b32 s50, v22, 11 ; GFX10_3-NEXT: v_readlane_b32 s49, v22, 10 ; GFX10_3-NEXT: v_readlane_b32 s48, v22, 9 -; GFX10_3-NEXT: v_readlane_b32 s47, v22, 8 -; GFX10_3-NEXT: v_readlane_b32 s46, v22, 7 +; GFX10_3-NEXT: v_readlane_b32 s39, v22, 8 +; GFX10_3-NEXT: v_readlane_b32 s38, v22, 7 ; GFX10_3-NEXT: v_readlane_b32 s37, v22, 6 ; GFX10_3-NEXT: v_readlane_b32 s36, v22, 5 ; GFX10_3-NEXT: v_readlane_b32 s35, v22, 4 @@ -1427,14 +1515,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: v_writelane_b32 v22, s35, 4 ; GFX11-NEXT: v_writelane_b32 v22, s36, 5 ; GFX11-NEXT: v_writelane_b32 v22, s37, 6 -; GFX11-NEXT: v_writelane_b32 v22, s46, 7 -; GFX11-NEXT: v_writelane_b32 v22, s47, 8 +; GFX11-NEXT: v_writelane_b32 v22, s38, 7 +; GFX11-NEXT: v_writelane_b32 v22, s39, 8 ; GFX11-NEXT: v_writelane_b32 v22, s48, 9 ; GFX11-NEXT: v_writelane_b32 v22, s49, 10 ; GFX11-NEXT: v_writelane_b32 v22, s50, 11 ; GFX11-NEXT: v_writelane_b32 v22, s51, 12 ; GFX11-NEXT: v_writelane_b32 v22, s52, 13 ; GFX11-NEXT: v_writelane_b32 v22, s53, 14 +; GFX11-NEXT: v_writelane_b32 v22, s54, 15 +; GFX11-NEXT: v_writelane_b32 v22, s55, 16 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX11-NEXT: ;;#ASMEND @@ -1442,14 +1532,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_readlane_b32 s55, v22, 16 +; GFX11-NEXT: v_readlane_b32 s54, v22, 15 ; GFX11-NEXT: v_readlane_b32 s53, v22, 14 ; GFX11-NEXT: v_readlane_b32 s52, v22, 13 ; GFX11-NEXT: v_readlane_b32 s51, v22, 12 ; GFX11-NEXT: v_readlane_b32 s50, v22, 11 ; GFX11-NEXT: v_readlane_b32 s49, v22, 10 ; GFX11-NEXT: v_readlane_b32 s48, v22, 9 -; GFX11-NEXT: v_readlane_b32 s47, v22, 8 -; GFX11-NEXT: v_readlane_b32 s46, v22, 7 +; GFX11-NEXT: v_readlane_b32 s39, v22, 8 +; GFX11-NEXT: v_readlane_b32 s38, v22, 7 ; GFX11-NEXT: v_readlane_b32 s37, v22, 6 ; GFX11-NEXT: v_readlane_b32 s36, v22, 5 ; GFX11-NEXT: v_readlane_b32 s35, v22, 4 @@ -1488,14 +1580,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: v_writelane_b32 v22, s35, 4 ; GFX12-NEXT: v_writelane_b32 v22, s36, 5 ; GFX12-NEXT: v_writelane_b32 v22, s37, 6 -; GFX12-NEXT: v_writelane_b32 v22, s46, 7 -; GFX12-NEXT: v_writelane_b32 v22, s47, 8 +; GFX12-NEXT: v_writelane_b32 v22, s38, 7 +; GFX12-NEXT: v_writelane_b32 v22, s39, 8 ; GFX12-NEXT: v_writelane_b32 v22, s48, 9 ; GFX12-NEXT: v_writelane_b32 v22, s49, 10 ; GFX12-NEXT: v_writelane_b32 v22, s50, 11 ; GFX12-NEXT: v_writelane_b32 v22, s51, 12 ; GFX12-NEXT: v_writelane_b32 v22, s52, 13 ; GFX12-NEXT: v_writelane_b32 v22, s53, 14 +; GFX12-NEXT: v_writelane_b32 v22, s54, 15 +; GFX12-NEXT: v_writelane_b32 v22, s55, 16 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX12-NEXT: ;;#ASMEND @@ -1503,14 +1597,16 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s55, v22, 16 +; GFX12-NEXT: v_readlane_b32 s54, v22, 15 ; GFX12-NEXT: v_readlane_b32 s53, v22, 14 ; GFX12-NEXT: v_readlane_b32 s52, v22, 13 ; GFX12-NEXT: v_readlane_b32 s51, v22, 12 ; GFX12-NEXT: v_readlane_b32 s50, v22, 11 ; GFX12-NEXT: v_readlane_b32 s49, v22, 10 ; GFX12-NEXT: v_readlane_b32 s48, v22, 9 -; GFX12-NEXT: v_readlane_b32 s47, v22, 8 -; GFX12-NEXT: v_readlane_b32 s46, v22, 7 +; GFX12-NEXT: v_readlane_b32 s39, v22, 8 +; GFX12-NEXT: v_readlane_b32 s38, v22, 7 ; GFX12-NEXT: v_readlane_b32 s37, v22, 6 ; GFX12-NEXT: v_readlane_b32 s36, v22, 5 ; GFX12-NEXT: v_readlane_b32 s35, v22, 4 diff --git a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll index 790b934c2b1bf..52f380b7f80a3 100644 --- a/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll +++ b/llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: I_Quit: ; CHECK: .set I_Quit.num_vgpr, max(41, amdgpu.max_num_vgpr) ; CHECK: .set I_Quit.num_agpr, max(0, amdgpu.max_num_agpr) -; CHECK: .set I_Quit.numbered_sgpr, max(64, amdgpu.max_num_sgpr) +; CHECK: .set I_Quit.numbered_sgpr, max(56, amdgpu.max_num_sgpr) ; CHECK: .set I_Quit.private_seg_size, 16 ; CHECK: .set I_Quit.uses_vcc, 1 ; CHECK: .set I_Quit.uses_flat_scratch, 1 @@ -80,7 +80,7 @@ define void @P_SetThingPosition() { ; CHECK-LABEL: P_SetupPsprites: ; CHECK: .set P_SetupPsprites.num_vgpr, max(41, amdgpu.max_num_vgpr) ; CHECK: .set P_SetupPsprites.num_agpr, max(0, amdgpu.max_num_agpr) -; CHECK: .set P_SetupPsprites.numbered_sgpr, max(64, amdgpu.max_num_sgpr) +; CHECK: .set P_SetupPsprites.numbered_sgpr, max(56, amdgpu.max_num_sgpr) ; CHECK: .set P_SetupPsprites.private_seg_size, 16 ; CHECK: .set P_SetupPsprites.uses_vcc, 1 ; CHECK: .set P_SetupPsprites.uses_flat_scratch, 1 @@ -128,7 +128,7 @@ define void @P_SpawnPlayer() { ; CHECK-LABEL: I_Error: ; CHECK: .set I_Error.num_vgpr, max(41, amdgpu.max_num_vgpr) ; CHECK: .set I_Error.num_agpr, max(0, amdgpu.max_num_agpr) -; CHECK: .set I_Error.numbered_sgpr, max(64, amdgpu.max_num_sgpr) +; CHECK: .set I_Error.numbered_sgpr, max(56, amdgpu.max_num_sgpr) ; CHECK: .set I_Error.private_seg_size, 16 ; CHECK: .set I_Error.uses_vcc, 1 ; CHECK: .set I_Error.uses_flat_scratch, 1 @@ -264,7 +264,7 @@ define ptr @P_SaveGameFile() { ; CHECK-LABEL: R_FlatNumForName: ; CHECK: .set R_FlatNumForName.num_vgpr, max(42, I_Error.num_vgpr) ; CHECK: .set R_FlatNumForName.num_agpr, max(0, I_Error.num_agpr) -; CHECK: .set R_FlatNumForName.numbered_sgpr, max(64, I_Error.numbered_sgpr) +; CHECK: .set R_FlatNumForName.numbered_sgpr, max(56, I_Error.numbered_sgpr) ; CHECK: .set R_FlatNumForName.private_seg_size, 16+(max(I_Error.private_seg_size)) ; CHECK: .set R_FlatNumForName.uses_vcc, or(1, I_Error.uses_vcc) ; CHECK: .set R_FlatNumForName.uses_flat_scratch, or(0, I_Error.uses_flat_scratch) @@ -279,7 +279,7 @@ define i32 @R_FlatNumForName() { ; CHECK-LABEL: R_TextureNumForName: ; CHECK: .set R_TextureNumForName.num_vgpr, max(42, R_FlatNumForName.num_vgpr) ; CHECK: .set R_TextureNumForName.num_agpr, max(0, R_FlatNumForName.num_agpr) -; CHECK: .set R_TextureNumForName.numbered_sgpr, max(64, R_FlatNumForName.numbered_sgpr) +; CHECK: .set R_TextureNumForName.numbered_sgpr, max(56, R_FlatNumForName.numbered_sgpr) ; CHECK: .set R_TextureNumForName.private_seg_size, 16+(max(R_FlatNumForName.private_seg_size)) ; CHECK: .set R_TextureNumForName.uses_vcc, or(1, R_FlatNumForName.uses_vcc) ; CHECK: .set R_TextureNumForName.uses_flat_scratch, or(0, R_FlatNumForName.uses_flat_scratch) diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index 8e957c1c31013..05cbd4c2a010d 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -27,25 +27,25 @@ body: | liveins: $vgpr1 ; CHECK-LABEL: name: scavenge_sgpr_pei_no_sgprs - ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1 + ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192 - ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec + ; CHECK-NEXT: $sgpr42 = S_MOV_B32 8192 + ; CHECK-NEXT: $vgpr0, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr0, 0, implicit $exec ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384 - ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $sgpr42 = S_MOV_B32 16384 + ; CHECK-NEXT: $vgpr2, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr2, 0, implicit $exec ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 - ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 + ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec @@ -75,24 +75,24 @@ body: | liveins: $vgpr1 ; CHECK-LABEL: name: scavenge_sgpr_pei_one_sgpr - ; CHECK: liveins: $sgpr29, $sgpr38, $vgpr1 + ; CHECK: liveins: $sgpr29, $sgpr40, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192 - ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec + ; CHECK-NEXT: $sgpr42 = S_MOV_B32 8192 + ; CHECK-NEXT: $vgpr0, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr0, 0, implicit $exec ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384 - ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $sgpr42 = S_MOV_B32 16384 + ; CHECK-NEXT: $vgpr2, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr2, 0, implicit $exec ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr38 + ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -133,11 +133,11 @@ body: | ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr38 = S_MOV_B32 8192 - ; CHECK-NEXT: $vgpr0, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr0, 0, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 8192 + ; CHECK-NEXT: $vgpr0, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr0, 0, implicit $exec ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr38 = S_MOV_B32 16384 - ; CHECK-NEXT: $vgpr2, dead $sgpr38_sgpr39 = V_ADD_CO_U32_e64 killed $sgpr38, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $sgpr40 = S_MOV_B32 16384 + ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr29 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index 88556040486e2..4f1c9a20fddc3 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -23,12 +23,12 @@ body: | liveins: $vgpr1 ; MUBUF-LABEL: name: scavenge_sgpr_pei_no_sgprs - ; MUBUF: liveins: $sgpr38, $sgpr39, $vgpr1 + ; MUBUF: liveins: $sgpr40, $sgpr41, $vgpr1 ; MUBUF-NEXT: {{ $}} - ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc ; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc @@ -38,27 +38,27 @@ body: | ; MUBUF-NEXT: $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec ; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 - ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 + ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc ; ; FLATSCR-LABEL: name: scavenge_sgpr_pei_no_sgprs - ; FLATSCR: liveins: $sgpr38, $sgpr39, $vgpr1 + ; FLATSCR: liveins: $sgpr40, $sgpr41, $vgpr1 ; FLATSCR-NEXT: {{ $}} - ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33 + ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34 + ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc - ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec - ; FLATSCR-NEXT: $sgpr40 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc - ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; FLATSCR-NEXT: $sgpr42 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc + ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr42, implicit $exec + ; FLATSCR-NEXT: $sgpr42 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc + ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr42, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 - ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 + ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index 1242e23db6c6a..480859a09a347 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -22,22 +22,22 @@ body: | liveins: $vgpr1 ; CHECK-LABEL: name: scavenge_sgpr_pei - ; CHECK: liveins: $sgpr38, $sgpr39, $vgpr1 + ; CHECK: liveins: $sgpr40, $sgpr41, $vgpr1 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $sgpr38 = frame-setup COPY $sgpr33 + ; CHECK-NEXT: $sgpr40 = frame-setup COPY $sgpr33 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc ; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc - ; CHECK-NEXT: $sgpr39 = frame-setup COPY $sgpr34 + ; CHECK-NEXT: $sgpr41 = frame-setup COPY $sgpr34 ; CHECK-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc ; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; CHECK-NEXT: $sgpr40 = S_MOV_B32 4096 - ; CHECK-NEXT: $vgpr2, dead $sgpr40_sgpr41 = V_ADD_CO_U32_e64 killed $sgpr40, killed $vgpr2, 0, implicit $exec + ; CHECK-NEXT: $sgpr42 = S_MOV_B32 4096 + ; CHECK-NEXT: $vgpr2, dead $sgpr42_sgpr43 = V_ADD_CO_U32_e64 killed $sgpr42, killed $vgpr2, 0, implicit $exec ; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; CHECK-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 - ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 + ; CHECK-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; CHECK-NEXT: S_ENDPGM 0, implicit $vcc S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc $vgpr0 = V_OR_B32_e32 %stack.0, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index 9be182a767685..ee89bf406c2a3 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -10,20 +10,20 @@ declare i64 @_Z13get_global_idj(i32) #0 define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX8-LABEL: clmem_read_simplified: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -91,20 +91,20 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: clmem_read_simplified: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -161,12 +161,12 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: clmem_read_simplified: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -174,8 +174,8 @@ define amdgpu_kernel void @clmem_read_simplified(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -342,20 +342,20 @@ entry: define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX8-LABEL: clmem_read: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -469,20 +469,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; ; GFX900-LABEL: clmem_read: ; GFX900: ; %bb.0: ; %entry -; GFX900-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX900-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX900-NEXT: s_mov_b32 s50, -1 -; GFX900-NEXT: s_mov_b32 s51, 0xe00000 -; GFX900-NEXT: s_add_u32 s48, s48, s11 -; GFX900-NEXT: s_addc_u32 s49, s49, 0 +; GFX900-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX900-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX900-NEXT: s_mov_b32 s38, -1 +; GFX900-NEXT: s_mov_b32 s39, 0xe00000 +; GFX900-NEXT: s_add_u32 s36, s36, s11 +; GFX900-NEXT: s_addc_u32 s37, s37, 0 ; GFX900-NEXT: s_getpc_b64 s[0:1] ; GFX900-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX900-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX900-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX900-NEXT: v_mov_b32_e32 v31, v0 ; GFX900-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX900-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX900-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX900-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX900-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX900-NEXT: v_mov_b32_e32 v0, 0 ; GFX900-NEXT: s_mov_b32 s32, 0 ; GFX900-NEXT: s_waitcnt lgkmcnt(0) @@ -586,12 +586,12 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: clmem_read: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -599,8 +599,8 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -698,20 +698,20 @@ define hidden amdgpu_kernel void @clmem_read(ptr addrspace(1) %buffer) { ; ; GFX90A-LABEL: clmem_read: ; GFX90A: ; %bb.0: ; %entry -; GFX90A-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX90A-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX90A-NEXT: s_mov_b32 s50, -1 -; GFX90A-NEXT: s_mov_b32 s51, 0xe00000 -; GFX90A-NEXT: s_add_u32 s48, s48, s11 -; GFX90A-NEXT: s_addc_u32 s49, s49, 0 +; GFX90A-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX90A-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX90A-NEXT: s_mov_b32 s38, -1 +; GFX90A-NEXT: s_mov_b32 s39, 0xe00000 +; GFX90A-NEXT: s_add_u32 s36, s36, s11 +; GFX90A-NEXT: s_addc_u32 s37, s37, 0 ; GFX90A-NEXT: s_getpc_b64 s[0:1] ; GFX90A-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX90A-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX90A-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX90A-NEXT: v_mov_b32_e32 v31, v0 ; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX90A-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX90A-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX90A-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX90A-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX90A-NEXT: v_mov_b32_e32 v0, 0 ; GFX90A-NEXT: s_mov_b32 s32, 0 ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) @@ -1030,20 +1030,20 @@ while.end: ; preds = %while.cond.loopexit define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX8-LABEL: Address32: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1116,20 +1116,20 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: Address32: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1173,12 +1173,12 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: Address32: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -1186,8 +1186,8 @@ define amdgpu_kernel void @Address32(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -1345,20 +1345,20 @@ entry: define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX8-LABEL: Offset64: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1397,20 +1397,20 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: Offset64: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1446,12 +1446,12 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: Offset64: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -1459,8 +1459,8 @@ define amdgpu_kernel void @Offset64(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -1569,20 +1569,20 @@ entry: define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX8-LABEL: p32Offset64: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -1619,20 +1619,20 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: p32Offset64: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -1664,12 +1664,12 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: p32Offset64: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -1677,8 +1677,8 @@ define amdgpu_kernel void @p32Offset64(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -1776,31 +1776,31 @@ entry: define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX8-LABEL: DiffBase: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s66, -1 -; GFX8-NEXT: s_mov_b32 s67, 0xe80000 -; GFX8-NEXT: s_add_u32 s64, s64, s11 -; GFX8-NEXT: s_addc_u32 s65, s65, 0 +; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s50, -1 +; GFX8-NEXT: s_mov_b32 s51, 0xe80000 +; GFX8-NEXT: s_add_u32 s48, s48, s11 +; GFX8-NEXT: s_addc_u32 s49, s49, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 -; GFX8-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24 +; GFX8-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[64:65] +; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffff8000, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s49 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s48, v2 +; GFX8-NEXT: v_mov_b32_e32 v1, s37 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s36, v2 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX8-NEXT: v_mov_b32_e32 v3, s51 -; GFX8-NEXT: v_add_u32_e32 v12, vcc, s50, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, s39 +; GFX8-NEXT: v_add_u32_e32 v12, vcc, s38, v2 ; GFX8-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v0 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc @@ -1839,31 +1839,31 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; ; GFX9-LABEL: DiffBase: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s66, -1 -; GFX9-NEXT: s_mov_b32 s67, 0xe00000 -; GFX9-NEXT: s_add_u32 s64, s64, s11 -; GFX9-NEXT: s_addc_u32 s65, s65, 0 +; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s50, -1 +; GFX9-NEXT: s_mov_b32 s51, 0xe00000 +; GFX9-NEXT: s_add_u32 s48, s48, s11 +; GFX9-NEXT: s_addc_u32 s49, s49, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX9-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX9-NEXT: v_and_b32_e32 v16, 0xffff8000, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s49 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s48, v16 +; GFX9-NEXT: v_mov_b32_e32 v0, s37 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, s36, v16 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc -; GFX9-NEXT: v_mov_b32_e32 v0, s51 -; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s50, v16 +; GFX9-NEXT: v_mov_b32_e32 v0, s39 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, s38, v16 ; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v0, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc @@ -1893,35 +1893,35 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v15, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49] +; GFX9-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: DiffBase: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s64, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s65, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s66, -1 -; GFX10-NEXT: s_mov_b32 s67, 0x31c16000 -; GFX10-NEXT: s_add_u32 s64, s64, s11 -; GFX10-NEXT: s_addc_u32 s65, s65, 0 +; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s50, -1 +; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 +; GFX10-NEXT: s_add_u32 s48, s48, s11 +; GFX10-NEXT: s_addc_u32 s49, s49, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX10-NEXT: v_mov_b32_e32 v31, v0 ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 -; GFX10-NEXT: s_load_dwordx4 s[48:51], s[4:5], 0x24 +; GFX10-NEXT: s_load_dwordx4 s[36:39], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[64:65] -; GFX10-NEXT: s_mov_b64 s[2:3], s[66:67] +; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX10-NEXT: v_and_b32_e32 v16, 0xffff8000, v0 -; GFX10-NEXT: v_add_co_u32 v8, s0, s48, v16 -; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s49, 0, s0 -; GFX10-NEXT: v_add_co_u32 v12, s0, s50, v16 -; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s51, 0, s0 +; GFX10-NEXT: v_add_co_u32 v8, s0, s36, v16 +; GFX10-NEXT: v_add_co_ci_u32_e64 v9, s0, s37, 0, s0 +; GFX10-NEXT: v_add_co_u32 v12, s0, s38, v16 +; GFX10-NEXT: v_add_co_ci_u32_e64 v13, s0, s39, 0, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v8, 0x1800 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v12, 0x3000 @@ -1952,7 +1952,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v15, v3, vcc_lo ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[48:49] +; GFX10-NEXT: global_store_dwordx2 v16, v[0:1], s[36:37] ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: DiffBase: @@ -1962,21 +1962,21 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX11-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX11-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, 0 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 -; GFX11-NEXT: s_load_b128 s[48:51], s[4:5], 0x24 +; GFX11-NEXT: s_load_b128 s[36:39], s[4:5], 0x24 ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 7, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v12, 0xffff8000, v0 -; GFX11-NEXT: v_add_co_u32 v2, s0, s48, v12 +; GFX11-NEXT: v_add_co_u32 v2, s0, s36, v12 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s49, 0, s0 -; GFX11-NEXT: v_add_co_u32 v8, s0, s50, v12 +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, s37, 0, s0 +; GFX11-NEXT: v_add_co_u32 v8, s0, s38, v12 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_4) ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s51, 0, s0 +; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, s39, 0, s0 ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, 0x2000 ; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo ; GFX11-NEXT: v_add_co_u32 v4, vcc_lo, 0x2000, v8 @@ -2005,7 +2005,7 @@ define amdgpu_kernel void @DiffBase(ptr addrspace(1) %buffer1, ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-NEXT: global_store_b64 v12, v[0:1], s[48:49] +; GFX11-NEXT: global_store_b64 v12, v[0:1], s[36:37] ; GFX11-NEXT: s_endpgm ptr addrspace(1) %buffer2) { entry: @@ -2046,20 +2046,20 @@ entry: define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX8-LABEL: ReverseOrder: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -2127,20 +2127,20 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; ; GFX9-LABEL: ReverseOrder: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2196,12 +2196,12 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; ; GFX10-LABEL: ReverseOrder: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -2209,8 +2209,8 @@ define amdgpu_kernel void @ReverseOrder(ptr addrspace(1) %buffer) { ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] @@ -2382,20 +2382,20 @@ entry: define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buffer) { ; GFX8-LABEL: negativeoffset: ; GFX8: ; %bb.0: ; %entry -; GFX8-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX8-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX8-NEXT: s_mov_b32 s50, -1 -; GFX8-NEXT: s_mov_b32 s51, 0xe80000 -; GFX8-NEXT: s_add_u32 s48, s48, s11 -; GFX8-NEXT: s_addc_u32 s49, s49, 0 +; GFX8-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX8-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX8-NEXT: s_mov_b32 s38, -1 +; GFX8-NEXT: s_mov_b32 s39, 0xe80000 +; GFX8-NEXT: s_add_u32 s36, s36, s11 +; GFX8-NEXT: s_addc_u32 s37, s37, 0 ; GFX8-NEXT: s_getpc_b64 s[0:1] ; GFX8-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX8-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX8-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX8-NEXT: s_mov_b64 s[0:1], s[48:49] +; GFX8-NEXT: s_mov_b64 s[0:1], s[36:37] ; GFX8-NEXT: v_mov_b32_e32 v31, v0 -; GFX8-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX8-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: s_mov_b32 s32, 0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) @@ -2423,20 +2423,20 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; ; GFX9-LABEL: negativeoffset: ; GFX9: ; %bb.0: ; %entry -; GFX9-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX9-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX9-NEXT: s_mov_b32 s50, -1 -; GFX9-NEXT: s_mov_b32 s51, 0xe00000 -; GFX9-NEXT: s_add_u32 s48, s48, s11 -; GFX9-NEXT: s_addc_u32 s49, s49, 0 +; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX9-NEXT: s_mov_b32 s38, -1 +; GFX9-NEXT: s_mov_b32 s39, 0xe00000 +; GFX9-NEXT: s_add_u32 s36, s36, s11 +; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_getpc_b64 s[0:1] ; GFX9-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX9-NEXT: v_mov_b32_e32 v31, v0 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 -; GFX9-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX9-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s32, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -2463,12 +2463,12 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; ; GFX10-LABEL: negativeoffset: ; GFX10: ; %bb.0: ; %entry -; GFX10-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GFX10-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GFX10-NEXT: s_mov_b32 s50, -1 -; GFX10-NEXT: s_mov_b32 s51, 0x31c16000 -; GFX10-NEXT: s_add_u32 s48, s48, s11 -; GFX10-NEXT: s_addc_u32 s49, s49, 0 +; GFX10-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GFX10-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GFX10-NEXT: s_mov_b32 s38, -1 +; GFX10-NEXT: s_mov_b32 s39, 0x31c16000 +; GFX10-NEXT: s_add_u32 s36, s36, s11 +; GFX10-NEXT: s_addc_u32 s37, s37, 0 ; GFX10-NEXT: s_getpc_b64 s[0:1] ; GFX10-NEXT: s_add_u32 s0, s0, _Z13get_global_idj@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s1, s1, _Z13get_global_idj@gotpcrel32@hi+12 @@ -2476,8 +2476,8 @@ define hidden amdgpu_kernel void @negativeoffset(ptr addrspace(1) nocapture %buf ; GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 ; GFX10-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] -; GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] +; GFX10-NEXT: s_mov_b64 s[0:1], s[36:37] +; GFX10-NEXT: s_mov_b64 s[2:3], s[38:39] ; GFX10-NEXT: s_mov_b32 s32, 0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir index 814674804df57..4a0bb6ceccd3f 100644 --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -42,16 +42,16 @@ body: | ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.3, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: renamable $sgpr56 = S_MOV_B32 0 ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: SI_SPILL_S64_SAVE killed renamable $sgpr12_sgpr13, %stack.4, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.4, align 4, addrspace 5) ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit $exec - ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: renamable $sgpr57 = S_MOV_B32 1083786240 ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) ; CHECK-NEXT: S_BRANCH %bb.1 @@ -60,7 +60,7 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr34_sgpr35, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.17 @@ -70,10 +70,6 @@ body: | ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr72 @@ -81,36 +77,40 @@ body: | ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr47 = COPY killed renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr80 = COPY killed renamable $sgpr52 - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 - ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr80 - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr49 = COPY killed renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr50 = COPY killed renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr52 = COPY killed renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72 ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 + ; CHECK-NEXT: renamable $sgpr52 = COPY renamable $sgpr56 + ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr56_sgpr57 = COPY renamable $sgpr52_sgpr53 + ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 + ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY renamable $sgpr56_sgpr57 + ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr76 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 + ; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50 = COPY renamable $sgpr52_sgpr53_sgpr54 + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54 = COPY renamable $sgpr48_sgpr49_sgpr50 + ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 + ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr84 ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr61 = COPY killed renamable $sgpr80 ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) @@ -125,6 +125,17 @@ body: | ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84 ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr68 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 + ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 + ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr68 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr65 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84 + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 @@ -157,21 +168,20 @@ body: | ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.3, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr62_sgpr63, implicit-def dead $scc - ; CHECK-NEXT: renamable $sgpr62_sgpr63 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 killed renamable $sgpr12_sgpr13, undef renamable $sgpr54_sgpr55, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr54_sgpr55 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) - ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $sgpr12_sgpr13, implicit $exec + ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr64_sgpr65 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec ; CHECK-NEXT: renamable $sgpr66_sgpr67 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec @@ -179,14 +189,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr64_sgpr65, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr84_sgpr85, implicit $exec @@ -197,8 +207,8 @@ body: | ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: renamable $sgpr68_sgpr69 = COPY killed renamable $sgpr4_sgpr5 ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr68_sgpr69 - ; CHECK-NEXT: renamable $sgpr78_sgpr79 = COPY killed renamable $sgpr6_sgpr7 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr78_sgpr79 + ; CHECK-NEXT: renamable $sgpr70_sgpr71 = COPY killed renamable $sgpr6_sgpr7 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr70_sgpr71 ; CHECK-NEXT: renamable $sgpr80_sgpr81 = COPY killed renamable $sgpr10_sgpr11 ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr80_sgpr81 ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr14 @@ -207,18 +217,18 @@ body: | ; CHECK-NEXT: renamable $sgpr33 = COPY killed renamable $sgpr16 ; CHECK-NEXT: renamable $sgpr83 = COPY killed renamable $sgpr15 ; CHECK-NEXT: renamable $sgpr85 = COPY killed renamable $sgpr14 - ; CHECK-NEXT: renamable $sgpr36_sgpr37 = COPY killed renamable $sgpr18_sgpr19 + ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr18_sgpr19 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr82_sgpr83 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 - ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr36_sgpr37 + ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr48_sgpr49 ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr85 ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr83 ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr33 ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr68_sgpr69 - ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr78_sgpr79 + ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr70_sgpr71 ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr84 ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr80_sgpr81 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -228,7 +238,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.10: ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.12 @@ -242,16 +252,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.12: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000) - ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr62_sgpr63, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr54_sgpr55, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr62_sgpr63 + ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr54_sgpr55 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.13: ; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.4, align 4, addrspace 5) ; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.14 @@ -264,7 +274,8 @@ body: | ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr100_sgpr101 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, killed renamable $sgpr12_sgpr13, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.16: diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll index 4a65b0ec50484..e920fdee51815 100644 --- a/llvm/test/CodeGen/AMDGPU/select.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -1905,12 +1905,12 @@ define <16 x half> @v_vselect_v16f16(<16 x half> %a, <16 x half> %b, <16 x i32> ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 ; VI-NEXT: v_cmp_eq_u32_e64 s[18:19], 0, v17 -; VI-NEXT: v_cmp_eq_u32_e64 s[38:39], 0, v29 +; VI-NEXT: v_cmp_eq_u32_e64 s[40:41], 0, v29 ; VI-NEXT: v_lshrrev_b32_e32 v16, 16, v6 ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v14 ; VI-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v18 ; VI-NEXT: v_cmp_eq_u32_e64 s[28:29], 0, v27 -; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[38:39] +; VI-NEXT: v_cndmask_b32_e64 v16, v17, v16, s[40:41] ; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v5 ; VI-NEXT: v_lshrrev_b32_e32 v18, 16, v13 ; VI-NEXT: v_cmp_eq_u32_e64 s[20:21], 0, v19 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll index 634d077e41d37..47810346c50b7 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll @@ -9,15 +9,15 @@ declare void @foo() define amdgpu_kernel void @kernel() { ; GCN-LABEL: kernel: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s50, -1 +; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s38, -1 ; GCN-NEXT: ; implicit-def: $vgpr40 : SGPR spill to VGPR lane -; GCN-NEXT: s_mov_b32 s51, 0xe00000 +; GCN-NEXT: s_mov_b32 s39, 0xe00000 ; GCN-NEXT: v_writelane_b32 v40, s4, 0 -; GCN-NEXT: s_add_u32 s48, s48, s11 +; GCN-NEXT: s_add_u32 s36, s36, s11 ; GCN-NEXT: v_writelane_b32 v40, s5, 1 -; GCN-NEXT: s_addc_u32 s49, s49, 0 +; GCN-NEXT: s_addc_u32 s37, s37, 0 ; GCN-NEXT: s_mov_b64 s[4:5], s[0:1] ; GCN-NEXT: v_readlane_b32 s0, v40, 0 ; GCN-NEXT: s_mov_b32 s13, s9 @@ -34,9 +34,9 @@ define amdgpu_kernel void @kernel() { ; GCN-NEXT: s_mov_b64 s[6:7], s[2:3] ; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 -; GCN-NEXT: s_mov_b64 s[0:1], s[48:49] +; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] ; GCN-NEXT: v_or3_b32 v31, v0, v1, v2 -; GCN-NEXT: s_mov_b64 s[2:3], s[50:51] +; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] ; GCN-NEXT: s_mov_b32 s32, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll index a93994b5d6e5d..7ee7c83e0122d 100644 --- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll @@ -15006,8 +15006,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -15029,8 +15029,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -15047,8 +15047,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -15070,8 +15070,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -15124,8 +15124,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -15147,8 +15147,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -15165,8 +15165,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -15188,8 +15188,8 @@ define void @s_shuffle_v2i64_v8i64__15_7() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16056,8 +16056,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -16077,8 +16077,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16095,8 +16095,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -16116,8 +16116,8 @@ define void @s_shuffle_v2i64_v8i64__10_0() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16785,8 +16785,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -16806,8 +16806,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -16824,8 +16824,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -16845,8 +16845,8 @@ define void @s_shuffle_v2i64_v8i64__10_1() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -20853,8 +20853,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -20876,8 +20876,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -20894,8 +20894,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -20917,8 +20917,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21027,8 +21027,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -21050,8 +21050,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21068,8 +21068,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -21091,8 +21091,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21145,8 +21145,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -21168,8 +21168,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21186,8 +21186,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -21209,8 +21209,8 @@ define void @s_shuffle_v2i64_v8i64__12_6() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21251,8 +21251,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -21274,8 +21274,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21292,8 +21292,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -21315,8 +21315,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21369,8 +21369,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -21392,8 +21392,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21410,8 +21410,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -21433,8 +21433,8 @@ define void @s_shuffle_v2i64_v8i64__14_6() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21913,8 +21913,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -21936,8 +21936,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -21954,8 +21954,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -21977,8 +21977,8 @@ define void @s_shuffle_v2i64_v8i64__9_7() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22087,8 +22087,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -22110,8 +22110,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22128,8 +22128,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -22151,8 +22151,8 @@ define void @s_shuffle_v2i64_v8i64__11_7() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22205,8 +22205,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -22228,8 +22228,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22246,8 +22246,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -22269,8 +22269,8 @@ define void @s_shuffle_v2i64_v8i64__12_7() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22311,8 +22311,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -22334,8 +22334,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22352,8 +22352,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -22375,8 +22375,8 @@ define void @s_shuffle_v2i64_v8i64__13_7() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22429,8 +22429,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -22452,8 +22452,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -22470,8 +22470,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -22493,8 +22493,8 @@ define void @s_shuffle_v2i64_v8i64__14_7() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -23411,8 +23411,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -23432,8 +23432,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -23450,8 +23450,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -23471,8 +23471,8 @@ define void @s_shuffle_v2i64_v8i64__3_9() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -23684,8 +23684,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -23707,8 +23707,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -23725,8 +23725,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -23748,8 +23748,8 @@ define void @s_shuffle_v2i64_v8i64__6_9() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -24528,8 +24528,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -24551,8 +24551,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -24569,8 +24569,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -24592,8 +24592,8 @@ define void @s_shuffle_v2i64_v8i64__6_10() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -24646,8 +24646,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -24669,8 +24669,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -24687,8 +24687,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -24710,8 +24710,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25159,8 +25159,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -25180,8 +25180,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25198,8 +25198,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -25219,8 +25219,8 @@ define void @s_shuffle_v2i64_v8i64__1_11() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25544,8 +25544,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -25567,8 +25567,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -25585,8 +25585,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -25608,8 +25608,8 @@ define void @s_shuffle_v2i64_v8i64__6_11() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -26388,8 +26388,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -26411,8 +26411,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -26429,8 +26429,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -26452,8 +26452,8 @@ define void @s_shuffle_v2i64_v8i64__6_12() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -26506,8 +26506,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -26529,8 +26529,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -26547,8 +26547,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -26570,8 +26570,8 @@ define void @s_shuffle_v2i64_v8i64__7_12() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27283,8 +27283,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -27306,8 +27306,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27324,8 +27324,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -27347,8 +27347,8 @@ define void @s_shuffle_v2i64_v8i64__6_13() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27401,8 +27401,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -27424,8 +27424,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -27442,8 +27442,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -27465,8 +27465,8 @@ define void @s_shuffle_v2i64_v8i64__7_13() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28241,8 +28241,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -28264,8 +28264,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28282,8 +28282,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -28305,8 +28305,8 @@ define void @s_shuffle_v2i64_v8i64__6_14() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28359,8 +28359,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -28382,8 +28382,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -28400,8 +28400,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -28423,8 +28423,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29223,8 +29223,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -29246,8 +29246,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29264,8 +29264,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -29287,8 +29287,8 @@ define void @s_shuffle_v2i64_v8i64__6_15() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29341,8 +29341,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_writelane_b32 v0, s36, 0 ; GFX900-NEXT: v_writelane_b32 v0, s37, 1 -; GFX900-NEXT: v_writelane_b32 v0, s46, 2 -; GFX900-NEXT: v_writelane_b32 v0, s47, 3 +; GFX900-NEXT: v_writelane_b32 v0, s38, 2 +; GFX900-NEXT: v_writelane_b32 v0, s39, 3 ; GFX900-NEXT: v_writelane_b32 v0, s48, 4 ; GFX900-NEXT: v_writelane_b32 v0, s49, 5 ; GFX900-NEXT: v_writelane_b32 v0, s50, 6 @@ -29364,8 +29364,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX900-NEXT: v_readlane_b32 s50, v0, 6 ; GFX900-NEXT: v_readlane_b32 s49, v0, 5 ; GFX900-NEXT: v_readlane_b32 s48, v0, 4 -; GFX900-NEXT: v_readlane_b32 s47, v0, 3 -; GFX900-NEXT: v_readlane_b32 s46, v0, 2 +; GFX900-NEXT: v_readlane_b32 s39, v0, 3 +; GFX900-NEXT: v_readlane_b32 s38, v0, 2 ; GFX900-NEXT: v_readlane_b32 s37, v0, 1 ; GFX900-NEXT: v_readlane_b32 s36, v0, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 @@ -29382,8 +29382,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX90A-NEXT: s_mov_b64 exec, s[4:5] ; GFX90A-NEXT: v_writelane_b32 v0, s36, 0 ; GFX90A-NEXT: v_writelane_b32 v0, s37, 1 -; GFX90A-NEXT: v_writelane_b32 v0, s46, 2 -; GFX90A-NEXT: v_writelane_b32 v0, s47, 3 +; GFX90A-NEXT: v_writelane_b32 v0, s38, 2 +; GFX90A-NEXT: v_writelane_b32 v0, s39, 3 ; GFX90A-NEXT: v_writelane_b32 v0, s48, 4 ; GFX90A-NEXT: v_writelane_b32 v0, s49, 5 ; GFX90A-NEXT: v_writelane_b32 v0, s50, 6 @@ -29405,8 +29405,8 @@ define void @s_shuffle_v2i64_v8i64__7_15() { ; GFX90A-NEXT: v_readlane_b32 s50, v0, 6 ; GFX90A-NEXT: v_readlane_b32 s49, v0, 5 ; GFX90A-NEXT: v_readlane_b32 s48, v0, 4 -; GFX90A-NEXT: v_readlane_b32 s47, v0, 3 -; GFX90A-NEXT: v_readlane_b32 s46, v0, 2 +; GFX90A-NEXT: v_readlane_b32 s39, v0, 3 +; GFX90A-NEXT: v_readlane_b32 s38, v0, 2 ; GFX90A-NEXT: v_readlane_b32 s37, v0, 1 ; GFX90A-NEXT: v_readlane_b32 s36, v0, 0 ; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 3447cd161c653..0221bb0cf4f35 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -610,16 +610,16 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: v_writelane_b32 v40, s35, 3 ; FIJI-NEXT: v_writelane_b32 v40, s36, 4 ; FIJI-NEXT: v_writelane_b32 v40, s37, 5 -; FIJI-NEXT: v_writelane_b32 v40, s46, 6 -; FIJI-NEXT: v_writelane_b32 v40, s47, 7 +; FIJI-NEXT: v_writelane_b32 v40, s38, 6 +; FIJI-NEXT: v_writelane_b32 v40, s39, 7 ; FIJI-NEXT: v_writelane_b32 v40, s48, 8 ; FIJI-NEXT: v_writelane_b32 v40, s49, 9 ; FIJI-NEXT: v_writelane_b32 v40, s50, 10 ; FIJI-NEXT: v_writelane_b32 v40, s51, 11 ; FIJI-NEXT: v_writelane_b32 v40, s52, 12 ; FIJI-NEXT: v_writelane_b32 v40, s53, 13 -; FIJI-NEXT: v_writelane_b32 v40, s62, 14 -; FIJI-NEXT: v_writelane_b32 v40, s63, 15 +; FIJI-NEXT: v_writelane_b32 v40, s54, 14 +; FIJI-NEXT: v_writelane_b32 v40, s55, 15 ; FIJI-NEXT: v_writelane_b32 v40, s64, 16 ; FIJI-NEXT: s_mov_b32 s50, s15 ; FIJI-NEXT: s_mov_b32 s51, s14 @@ -627,10 +627,10 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: s_mov_b32 s53, s12 ; FIJI-NEXT: s_mov_b64 s[34:35], s[10:11] ; FIJI-NEXT: s_mov_b64 s[36:37], s[8:9] -; FIJI-NEXT: s_mov_b64 s[46:47], s[6:7] +; FIJI-NEXT: s_mov_b64 s[38:39], s[6:7] ; FIJI-NEXT: s_mov_b64 s[48:49], s[4:5] ; FIJI-NEXT: v_add_u32_e32 v3, vcc, v3, v4 -; FIJI-NEXT: s_mov_b64 s[62:63], exec +; FIJI-NEXT: s_mov_b64 s[54:55], exec ; FIJI-NEXT: s_addk_i32 s32, 0x400 ; FIJI-NEXT: v_writelane_b32 v40, s65, 17 ; FIJI-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 @@ -639,7 +639,7 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; FIJI-NEXT: s_and_saveexec_b64 s[64:65], vcc ; FIJI-NEXT: s_mov_b64 s[4:5], s[48:49] -; FIJI-NEXT: s_mov_b64 s[6:7], s[46:47] +; FIJI-NEXT: s_mov_b64 s[6:7], s[38:39] ; FIJI-NEXT: s_mov_b64 s[8:9], s[36:37] ; FIJI-NEXT: s_mov_b64 s[10:11], s[34:35] ; FIJI-NEXT: s_mov_b32 s12, s53 @@ -657,20 +657,20 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; FIJI-NEXT: s_xor_b64 exec, exec, s[64:65] ; FIJI-NEXT: s_cbranch_execnz .LBB18_1 ; FIJI-NEXT: ; %bb.2: -; FIJI-NEXT: s_mov_b64 exec, s[62:63] +; FIJI-NEXT: s_mov_b64 exec, s[54:55] ; FIJI-NEXT: v_mov_b32_e32 v0, v4 ; FIJI-NEXT: v_readlane_b32 s65, v40, 17 ; FIJI-NEXT: v_readlane_b32 s64, v40, 16 -; FIJI-NEXT: v_readlane_b32 s63, v40, 15 -; FIJI-NEXT: v_readlane_b32 s62, v40, 14 +; FIJI-NEXT: v_readlane_b32 s55, v40, 15 +; FIJI-NEXT: v_readlane_b32 s54, v40, 14 ; FIJI-NEXT: v_readlane_b32 s53, v40, 13 ; FIJI-NEXT: v_readlane_b32 s52, v40, 12 ; FIJI-NEXT: v_readlane_b32 s51, v40, 11 ; FIJI-NEXT: v_readlane_b32 s50, v40, 10 ; FIJI-NEXT: v_readlane_b32 s49, v40, 9 ; FIJI-NEXT: v_readlane_b32 s48, v40, 8 -; FIJI-NEXT: v_readlane_b32 s47, v40, 7 -; FIJI-NEXT: v_readlane_b32 s46, v40, 6 +; FIJI-NEXT: v_readlane_b32 s39, v40, 7 +; FIJI-NEXT: v_readlane_b32 s38, v40, 6 ; FIJI-NEXT: v_readlane_b32 s37, v40, 5 ; FIJI-NEXT: v_readlane_b32 s36, v40, 4 ; FIJI-NEXT: v_readlane_b32 s35, v40, 3 @@ -701,16 +701,16 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: v_writelane_b32 v40, s35, 3 ; HAWAII-NEXT: v_writelane_b32 v40, s36, 4 ; HAWAII-NEXT: v_writelane_b32 v40, s37, 5 -; HAWAII-NEXT: v_writelane_b32 v40, s46, 6 -; HAWAII-NEXT: v_writelane_b32 v40, s47, 7 +; HAWAII-NEXT: v_writelane_b32 v40, s38, 6 +; HAWAII-NEXT: v_writelane_b32 v40, s39, 7 ; HAWAII-NEXT: v_writelane_b32 v40, s48, 8 ; HAWAII-NEXT: v_writelane_b32 v40, s49, 9 ; HAWAII-NEXT: v_writelane_b32 v40, s50, 10 ; HAWAII-NEXT: v_writelane_b32 v40, s51, 11 ; HAWAII-NEXT: v_writelane_b32 v40, s52, 12 ; HAWAII-NEXT: v_writelane_b32 v40, s53, 13 -; HAWAII-NEXT: v_writelane_b32 v40, s62, 14 -; HAWAII-NEXT: v_writelane_b32 v40, s63, 15 +; HAWAII-NEXT: v_writelane_b32 v40, s54, 14 +; HAWAII-NEXT: v_writelane_b32 v40, s55, 15 ; HAWAII-NEXT: v_writelane_b32 v40, s64, 16 ; HAWAII-NEXT: s_mov_b32 s50, s15 ; HAWAII-NEXT: s_mov_b32 s51, s14 @@ -718,10 +718,10 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: s_mov_b32 s53, s12 ; HAWAII-NEXT: s_mov_b64 s[34:35], s[10:11] ; HAWAII-NEXT: s_mov_b64 s[36:37], s[8:9] -; HAWAII-NEXT: s_mov_b64 s[46:47], s[6:7] +; HAWAII-NEXT: s_mov_b64 s[38:39], s[6:7] ; HAWAII-NEXT: s_mov_b64 s[48:49], s[4:5] ; HAWAII-NEXT: v_add_i32_e32 v3, vcc, v3, v4 -; HAWAII-NEXT: s_mov_b64 s[62:63], exec +; HAWAII-NEXT: s_mov_b64 s[54:55], exec ; HAWAII-NEXT: s_addk_i32 s32, 0x400 ; HAWAII-NEXT: v_writelane_b32 v40, s65, 17 ; HAWAII-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 @@ -730,7 +730,7 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; HAWAII-NEXT: s_and_saveexec_b64 s[64:65], vcc ; HAWAII-NEXT: s_mov_b64 s[4:5], s[48:49] -; HAWAII-NEXT: s_mov_b64 s[6:7], s[46:47] +; HAWAII-NEXT: s_mov_b64 s[6:7], s[38:39] ; HAWAII-NEXT: s_mov_b64 s[8:9], s[36:37] ; HAWAII-NEXT: s_mov_b64 s[10:11], s[34:35] ; HAWAII-NEXT: s_mov_b32 s12, s53 @@ -748,20 +748,20 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; HAWAII-NEXT: s_xor_b64 exec, exec, s[64:65] ; HAWAII-NEXT: s_cbranch_execnz .LBB18_1 ; HAWAII-NEXT: ; %bb.2: -; HAWAII-NEXT: s_mov_b64 exec, s[62:63] +; HAWAII-NEXT: s_mov_b64 exec, s[54:55] ; HAWAII-NEXT: v_mov_b32_e32 v0, v4 ; HAWAII-NEXT: v_readlane_b32 s65, v40, 17 ; HAWAII-NEXT: v_readlane_b32 s64, v40, 16 -; HAWAII-NEXT: v_readlane_b32 s63, v40, 15 -; HAWAII-NEXT: v_readlane_b32 s62, v40, 14 +; HAWAII-NEXT: v_readlane_b32 s55, v40, 15 +; HAWAII-NEXT: v_readlane_b32 s54, v40, 14 ; HAWAII-NEXT: v_readlane_b32 s53, v40, 13 ; HAWAII-NEXT: v_readlane_b32 s52, v40, 12 ; HAWAII-NEXT: v_readlane_b32 s51, v40, 11 ; HAWAII-NEXT: v_readlane_b32 s50, v40, 10 ; HAWAII-NEXT: v_readlane_b32 s49, v40, 9 ; HAWAII-NEXT: v_readlane_b32 s48, v40, 8 -; HAWAII-NEXT: v_readlane_b32 s47, v40, 7 -; HAWAII-NEXT: v_readlane_b32 s46, v40, 6 +; HAWAII-NEXT: v_readlane_b32 s39, v40, 7 +; HAWAII-NEXT: v_readlane_b32 s38, v40, 6 ; HAWAII-NEXT: v_readlane_b32 s37, v40, 5 ; HAWAII-NEXT: v_readlane_b32 s36, v40, 4 ; HAWAII-NEXT: v_readlane_b32 s35, v40, 3 @@ -792,16 +792,16 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: v_writelane_b32 v40, s35, 3 ; GFX9-NEXT: v_writelane_b32 v40, s36, 4 ; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s46, 6 -; GFX9-NEXT: v_writelane_b32 v40, s47, 7 +; GFX9-NEXT: v_writelane_b32 v40, s38, 6 +; GFX9-NEXT: v_writelane_b32 v40, s39, 7 ; GFX9-NEXT: v_writelane_b32 v40, s48, 8 ; GFX9-NEXT: v_writelane_b32 v40, s49, 9 ; GFX9-NEXT: v_writelane_b32 v40, s50, 10 ; GFX9-NEXT: v_writelane_b32 v40, s51, 11 ; GFX9-NEXT: v_writelane_b32 v40, s52, 12 ; GFX9-NEXT: v_writelane_b32 v40, s53, 13 -; GFX9-NEXT: v_writelane_b32 v40, s62, 14 -; GFX9-NEXT: v_writelane_b32 v40, s63, 15 +; GFX9-NEXT: v_writelane_b32 v40, s54, 14 +; GFX9-NEXT: v_writelane_b32 v40, s55, 15 ; GFX9-NEXT: v_writelane_b32 v40, s64, 16 ; GFX9-NEXT: s_mov_b32 s50, s15 ; GFX9-NEXT: s_mov_b32 s51, s14 @@ -809,10 +809,10 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: s_mov_b32 s53, s12 ; GFX9-NEXT: s_mov_b64 s[34:35], s[10:11] ; GFX9-NEXT: s_mov_b64 s[36:37], s[8:9] -; GFX9-NEXT: s_mov_b64 s[46:47], s[6:7] +; GFX9-NEXT: s_mov_b64 s[38:39], s[6:7] ; GFX9-NEXT: s_mov_b64 s[48:49], s[4:5] ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 -; GFX9-NEXT: s_mov_b64 s[62:63], exec +; GFX9-NEXT: s_mov_b64 s[54:55], exec ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s65, 17 ; GFX9-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 @@ -821,7 +821,7 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1] ; GFX9-NEXT: s_and_saveexec_b64 s[64:65], vcc ; GFX9-NEXT: s_mov_b64 s[4:5], s[48:49] -; GFX9-NEXT: s_mov_b64 s[6:7], s[46:47] +; GFX9-NEXT: s_mov_b64 s[6:7], s[38:39] ; GFX9-NEXT: s_mov_b64 s[8:9], s[36:37] ; GFX9-NEXT: s_mov_b64 s[10:11], s[34:35] ; GFX9-NEXT: s_mov_b32 s12, s53 @@ -839,20 +839,20 @@ define hidden fastcc i32 @indirect_divergent_sibling_call_i32_fastcc_i32_i32(ptr ; GFX9-NEXT: s_xor_b64 exec, exec, s[64:65] ; GFX9-NEXT: s_cbranch_execnz .LBB18_1 ; GFX9-NEXT: ; %bb.2: -; GFX9-NEXT: s_mov_b64 exec, s[62:63] +; GFX9-NEXT: s_mov_b64 exec, s[54:55] ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_readlane_b32 s65, v40, 17 ; GFX9-NEXT: v_readlane_b32 s64, v40, 16 -; GFX9-NEXT: v_readlane_b32 s63, v40, 15 -; GFX9-NEXT: v_readlane_b32 s62, v40, 14 +; GFX9-NEXT: v_readlane_b32 s55, v40, 15 +; GFX9-NEXT: v_readlane_b32 s54, v40, 14 ; GFX9-NEXT: v_readlane_b32 s53, v40, 13 ; GFX9-NEXT: v_readlane_b32 s52, v40, 12 ; GFX9-NEXT: v_readlane_b32 s51, v40, 11 ; GFX9-NEXT: v_readlane_b32 s50, v40, 10 ; GFX9-NEXT: v_readlane_b32 s49, v40, 9 ; GFX9-NEXT: v_readlane_b32 s48, v40, 8 -; GFX9-NEXT: v_readlane_b32 s47, v40, 7 -; GFX9-NEXT: v_readlane_b32 s46, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 7 +; GFX9-NEXT: v_readlane_b32 s38, v40, 6 ; GFX9-NEXT: v_readlane_b32 s37, v40, 5 ; GFX9-NEXT: v_readlane_b32 s36, v40, 4 ; GFX9-NEXT: v_readlane_b32 s35, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir index 6b5c624356f47..cf23a9d1e8a57 100644 --- a/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir @@ -34,78 +34,56 @@ body: | ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr34_sgpr35 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr49 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY undef $sgpr8_sgpr9 + ; CHECK-NEXT: renamable $sgpr35 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY undef $sgpr8_sgpr9 ; CHECK-NEXT: renamable $sgpr36_sgpr37 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4) - ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr46_sgpr47, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 0, 0 :: (invariant load (s64), align 16, addrspace 4) + ; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 0, 0 :: (dereferenceable invariant load (s256), align 16, addrspace 4) + ; CHECK-NEXT: dead renamable $sgpr4 = S_LOAD_DWORD_IMM renamable $sgpr38_sgpr39, 48, 0 :: (dereferenceable invariant load (s32), align 16, addrspace 4) + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr48_sgpr49, 0, 0 :: (invariant load (s64), align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY renamable $sgpr14_sgpr15 - ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr13 - ; CHECK-NEXT: renamable $vgpr23 = IMPLICIT_DEF - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr23, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, killed $vgpr23 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr6, 2, killed $vgpr23 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr7, 3, killed $vgpr23 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr8, 4, killed $vgpr23 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr9, 5, killed $vgpr23 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr10, 6, killed $vgpr23 - ; CHECK-NEXT: $vgpr23 = SI_SPILL_S32_TO_VGPR $sgpr11, 7, killed $vgpr23, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr23, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; CHECK-NEXT: $vgpr1 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: $vgpr1 = COPY renamable $sgpr55 ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit undef $sgpr15, implicit $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $vcc = COPY renamable $sgpr48_sgpr49 + ; CHECK-NEXT: $vcc = COPY renamable $sgpr34_sgpr35 ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.2, implicit undef $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47 + ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47 + ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr46_sgpr47, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) + ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 = S_LOAD_DWORDX8_IMM renamable $sgpr38_sgpr39, 56, 0 :: (dereferenceable invariant load (s256), align 8, addrspace 4) ; CHECK-NEXT: S_CMP_LG_U64 renamable $sgpr4_sgpr5, 0, implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0 + ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.5, implicit undef $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0 + ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000003F0, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr8, 0, implicit-def $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: liveins: $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0 + ; CHECK-NEXT: liveins: $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11:0x00000000000000F0, $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55:0x000000000000FC00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr46_sgpr47, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) + ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr38_sgpr39, 40, 0 :: (dereferenceable invariant load (s64), addrspace 4) ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], killed renamable $sgpr6_sgpr7, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; CHECK-NEXT: renamable $vgpr23 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 1 - ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 2 - ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 3 - ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 4 - ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 5 - ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr23, 6 - ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR killed $vgpr23, 7 - ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr10_sgpr11, 0, 0, implicit $exec :: (store (s32), addrspace 1) - ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9 + ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR undef [[DEF]], undef [[DEF]], renamable $sgpr54_sgpr55, 0, 0, implicit $exec :: (store (s32), addrspace 1) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr53 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: $sgpr6_sgpr7 = COPY killed renamable $sgpr36_sgpr37 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr34_sgpr35 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = IMPLICIT_DEF ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir index adaef348a0388..7f4f9489ea4b7 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir @@ -53,7 +53,7 @@ body: | bb.0: liveins: $sgpr30_sgpr31, $sgpr10, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-LABEL: name: sgpr_spill_lane_crossover - ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63 @@ -61,16 +61,16 @@ body: | ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 6, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 7, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 8, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 9, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 10, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 11, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 12, $vgpr63 ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 13, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 14, $vgpr63 - ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 15, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 14, $vgpr63 + ; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 15, $vgpr63 ; GCN-NEXT: S_NOP 0 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]] diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir index fcd835c7f09da..6e8a5126ca823 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir @@ -60,11 +60,11 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, killed $vgpr0 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, killed $vgpr0 - ; GCN-NEXT: $sgpr38_sgpr39 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $sgpr40_sgpr41 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $sgpr28_sgpr29 = IMPLICIT_DEF ; GCN-NEXT: $vgpr1 = COPY $vgpr0 ; GCN-NEXT: S_NOP 0, implicit $sgpr28_sgpr29 - ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr38_sgpr39 + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr40_sgpr41 ; GCN-NEXT: $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 0 ; GCN-NEXT: $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 1 ; GCN-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8_sgpr9_sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr15, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $vcc diff --git a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll index f7ea8109beea4..fba85455ef693 100644 --- a/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll @@ -11,32 +11,32 @@ define void @spill_more_than_wavesize_csr_sgprs() { ; CHECK-NEXT: v_writelane_b32 v0, s35, 0 ; CHECK-NEXT: v_writelane_b32 v0, s36, 1 ; CHECK-NEXT: v_writelane_b32 v0, s37, 2 -; CHECK-NEXT: v_writelane_b32 v0, s46, 3 -; CHECK-NEXT: v_writelane_b32 v0, s47, 4 +; CHECK-NEXT: v_writelane_b32 v0, s38, 3 +; CHECK-NEXT: v_writelane_b32 v0, s39, 4 ; CHECK-NEXT: v_writelane_b32 v0, s48, 5 ; CHECK-NEXT: v_writelane_b32 v0, s49, 6 ; CHECK-NEXT: v_writelane_b32 v0, s50, 7 ; CHECK-NEXT: v_writelane_b32 v0, s51, 8 ; CHECK-NEXT: v_writelane_b32 v0, s52, 9 ; CHECK-NEXT: v_writelane_b32 v0, s53, 10 -; CHECK-NEXT: v_writelane_b32 v0, s62, 11 -; CHECK-NEXT: v_writelane_b32 v0, s63, 12 +; CHECK-NEXT: v_writelane_b32 v0, s54, 11 +; CHECK-NEXT: v_writelane_b32 v0, s55, 12 ; CHECK-NEXT: v_writelane_b32 v0, s64, 13 ; CHECK-NEXT: v_writelane_b32 v0, s65, 14 ; CHECK-NEXT: v_writelane_b32 v0, s66, 15 ; CHECK-NEXT: v_writelane_b32 v0, s67, 16 ; CHECK-NEXT: v_writelane_b32 v0, s68, 17 ; CHECK-NEXT: v_writelane_b32 v0, s69, 18 -; CHECK-NEXT: v_writelane_b32 v0, s78, 19 -; CHECK-NEXT: v_writelane_b32 v0, s79, 20 +; CHECK-NEXT: v_writelane_b32 v0, s70, 19 +; CHECK-NEXT: v_writelane_b32 v0, s71, 20 ; CHECK-NEXT: v_writelane_b32 v0, s80, 21 ; CHECK-NEXT: v_writelane_b32 v0, s81, 22 ; CHECK-NEXT: v_writelane_b32 v0, s82, 23 ; CHECK-NEXT: v_writelane_b32 v0, s83, 24 ; CHECK-NEXT: v_writelane_b32 v0, s84, 25 ; CHECK-NEXT: v_writelane_b32 v0, s85, 26 -; CHECK-NEXT: v_writelane_b32 v0, s94, 27 -; CHECK-NEXT: v_writelane_b32 v0, s95, 28 +; CHECK-NEXT: v_writelane_b32 v0, s86, 27 +; CHECK-NEXT: v_writelane_b32 v0, s87, 28 ; CHECK-NEXT: v_writelane_b32 v0, s96, 29 ; CHECK-NEXT: v_writelane_b32 v0, s97, 30 ; CHECK-NEXT: v_writelane_b32 v0, s98, 31 @@ -53,32 +53,32 @@ define void @spill_more_than_wavesize_csr_sgprs() { ; CHECK-NEXT: v_readlane_b32 s98, v0, 31 ; CHECK-NEXT: v_readlane_b32 s97, v0, 30 ; CHECK-NEXT: v_readlane_b32 s96, v0, 29 -; CHECK-NEXT: v_readlane_b32 s95, v0, 28 -; CHECK-NEXT: v_readlane_b32 s94, v0, 27 +; CHECK-NEXT: v_readlane_b32 s87, v0, 28 +; CHECK-NEXT: v_readlane_b32 s86, v0, 27 ; CHECK-NEXT: v_readlane_b32 s85, v0, 26 ; CHECK-NEXT: v_readlane_b32 s84, v0, 25 ; CHECK-NEXT: v_readlane_b32 s83, v0, 24 ; CHECK-NEXT: v_readlane_b32 s82, v0, 23 ; CHECK-NEXT: v_readlane_b32 s81, v0, 22 ; CHECK-NEXT: v_readlane_b32 s80, v0, 21 -; CHECK-NEXT: v_readlane_b32 s79, v0, 20 -; CHECK-NEXT: v_readlane_b32 s78, v0, 19 +; CHECK-NEXT: v_readlane_b32 s71, v0, 20 +; CHECK-NEXT: v_readlane_b32 s70, v0, 19 ; CHECK-NEXT: v_readlane_b32 s69, v0, 18 ; CHECK-NEXT: v_readlane_b32 s68, v0, 17 ; CHECK-NEXT: v_readlane_b32 s67, v0, 16 ; CHECK-NEXT: v_readlane_b32 s66, v0, 15 ; CHECK-NEXT: v_readlane_b32 s65, v0, 14 ; CHECK-NEXT: v_readlane_b32 s64, v0, 13 -; CHECK-NEXT: v_readlane_b32 s63, v0, 12 -; CHECK-NEXT: v_readlane_b32 s62, v0, 11 +; CHECK-NEXT: v_readlane_b32 s55, v0, 12 +; CHECK-NEXT: v_readlane_b32 s54, v0, 11 ; CHECK-NEXT: v_readlane_b32 s53, v0, 10 ; CHECK-NEXT: v_readlane_b32 s52, v0, 9 ; CHECK-NEXT: v_readlane_b32 s51, v0, 8 ; CHECK-NEXT: v_readlane_b32 s50, v0, 7 ; CHECK-NEXT: v_readlane_b32 s49, v0, 6 ; CHECK-NEXT: v_readlane_b32 s48, v0, 5 -; CHECK-NEXT: v_readlane_b32 s47, v0, 4 -; CHECK-NEXT: v_readlane_b32 s46, v0, 3 +; CHECK-NEXT: v_readlane_b32 s39, v0, 4 +; CHECK-NEXT: v_readlane_b32 s38, v0, 3 ; CHECK-NEXT: v_readlane_b32 s37, v0, 2 ; CHECK-NEXT: v_readlane_b32 s36, v0, 1 ; CHECK-NEXT: v_readlane_b32 s35, v0, 0 @@ -110,32 +110,32 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() { ; CHECK-NEXT: v_writelane_b32 v1, s35, 0 ; CHECK-NEXT: v_writelane_b32 v1, s36, 1 ; CHECK-NEXT: v_writelane_b32 v1, s37, 2 -; CHECK-NEXT: v_writelane_b32 v1, s46, 3 -; CHECK-NEXT: v_writelane_b32 v1, s47, 4 +; CHECK-NEXT: v_writelane_b32 v1, s38, 3 +; CHECK-NEXT: v_writelane_b32 v1, s39, 4 ; CHECK-NEXT: v_writelane_b32 v1, s48, 5 ; CHECK-NEXT: v_writelane_b32 v1, s49, 6 ; CHECK-NEXT: v_writelane_b32 v1, s50, 7 ; CHECK-NEXT: v_writelane_b32 v1, s51, 8 ; CHECK-NEXT: v_writelane_b32 v1, s52, 9 ; CHECK-NEXT: v_writelane_b32 v1, s53, 10 -; CHECK-NEXT: v_writelane_b32 v1, s62, 11 -; CHECK-NEXT: v_writelane_b32 v1, s63, 12 +; CHECK-NEXT: v_writelane_b32 v1, s54, 11 +; CHECK-NEXT: v_writelane_b32 v1, s55, 12 ; CHECK-NEXT: v_writelane_b32 v1, s64, 13 ; CHECK-NEXT: v_writelane_b32 v1, s65, 14 ; CHECK-NEXT: v_writelane_b32 v1, s66, 15 ; CHECK-NEXT: v_writelane_b32 v1, s67, 16 ; CHECK-NEXT: v_writelane_b32 v1, s68, 17 ; CHECK-NEXT: v_writelane_b32 v1, s69, 18 -; CHECK-NEXT: v_writelane_b32 v1, s78, 19 -; CHECK-NEXT: v_writelane_b32 v1, s79, 20 +; CHECK-NEXT: v_writelane_b32 v1, s70, 19 +; CHECK-NEXT: v_writelane_b32 v1, s71, 20 ; CHECK-NEXT: v_writelane_b32 v1, s80, 21 ; CHECK-NEXT: v_writelane_b32 v1, s81, 22 ; CHECK-NEXT: v_writelane_b32 v1, s82, 23 ; CHECK-NEXT: v_writelane_b32 v1, s83, 24 ; CHECK-NEXT: v_writelane_b32 v1, s84, 25 ; CHECK-NEXT: v_writelane_b32 v1, s85, 26 -; CHECK-NEXT: v_writelane_b32 v1, s94, 27 -; CHECK-NEXT: v_writelane_b32 v1, s95, 28 +; CHECK-NEXT: v_writelane_b32 v1, s86, 27 +; CHECK-NEXT: v_writelane_b32 v1, s87, 28 ; CHECK-NEXT: v_writelane_b32 v1, s96, 29 ; CHECK-NEXT: v_writelane_b32 v1, s97, 30 ; CHECK-NEXT: v_writelane_b32 v1, s98, 31 @@ -155,32 +155,32 @@ define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() { ; CHECK-NEXT: v_readlane_b32 s98, v1, 31 ; CHECK-NEXT: v_readlane_b32 s97, v1, 30 ; CHECK-NEXT: v_readlane_b32 s96, v1, 29 -; CHECK-NEXT: v_readlane_b32 s95, v1, 28 -; CHECK-NEXT: v_readlane_b32 s94, v1, 27 +; CHECK-NEXT: v_readlane_b32 s87, v1, 28 +; CHECK-NEXT: v_readlane_b32 s86, v1, 27 ; CHECK-NEXT: v_readlane_b32 s85, v1, 26 ; CHECK-NEXT: v_readlane_b32 s84, v1, 25 ; CHECK-NEXT: v_readlane_b32 s83, v1, 24 ; CHECK-NEXT: v_readlane_b32 s82, v1, 23 ; CHECK-NEXT: v_readlane_b32 s81, v1, 22 ; CHECK-NEXT: v_readlane_b32 s80, v1, 21 -; CHECK-NEXT: v_readlane_b32 s79, v1, 20 -; CHECK-NEXT: v_readlane_b32 s78, v1, 19 +; CHECK-NEXT: v_readlane_b32 s71, v1, 20 +; CHECK-NEXT: v_readlane_b32 s70, v1, 19 ; CHECK-NEXT: v_readlane_b32 s69, v1, 18 ; CHECK-NEXT: v_readlane_b32 s68, v1, 17 ; CHECK-NEXT: v_readlane_b32 s67, v1, 16 ; CHECK-NEXT: v_readlane_b32 s66, v1, 15 ; CHECK-NEXT: v_readlane_b32 s65, v1, 14 ; CHECK-NEXT: v_readlane_b32 s64, v1, 13 -; CHECK-NEXT: v_readlane_b32 s63, v1, 12 -; CHECK-NEXT: v_readlane_b32 s62, v1, 11 +; CHECK-NEXT: v_readlane_b32 s55, v1, 12 +; CHECK-NEXT: v_readlane_b32 s54, v1, 11 ; CHECK-NEXT: v_readlane_b32 s53, v1, 10 ; CHECK-NEXT: v_readlane_b32 s52, v1, 9 ; CHECK-NEXT: v_readlane_b32 s51, v1, 8 ; CHECK-NEXT: v_readlane_b32 s50, v1, 7 ; CHECK-NEXT: v_readlane_b32 s49, v1, 6 ; CHECK-NEXT: v_readlane_b32 s48, v1, 5 -; CHECK-NEXT: v_readlane_b32 s47, v1, 4 -; CHECK-NEXT: v_readlane_b32 s46, v1, 3 +; CHECK-NEXT: v_readlane_b32 s39, v1, 4 +; CHECK-NEXT: v_readlane_b32 s38, v1, 3 ; CHECK-NEXT: v_readlane_b32 s37, v1, 2 ; CHECK-NEXT: v_readlane_b32 s36, v1, 1 ; CHECK-NEXT: v_readlane_b32 s35, v1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll index 89bb346ee98df..d4d3b37a0ed1e 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -11,12 +11,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-LABEL: kernel_background_evaluate: ; MUBUF: ; %bb.0: ; %entry ; MUBUF-NEXT: s_load_dword s0, s[4:5], 0x24 -; MUBUF-NEXT: s_mov_b32 s48, SCRATCH_RSRC_DWORD0 -; MUBUF-NEXT: s_mov_b32 s49, SCRATCH_RSRC_DWORD1 -; MUBUF-NEXT: s_mov_b32 s50, -1 -; MUBUF-NEXT: s_mov_b32 s51, 0x31c16000 -; MUBUF-NEXT: s_add_u32 s48, s48, s11 -; MUBUF-NEXT: s_addc_u32 s49, s49, 0 +; MUBUF-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; MUBUF-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; MUBUF-NEXT: s_mov_b32 s38, -1 +; MUBUF-NEXT: s_mov_b32 s39, 0x31c16000 +; MUBUF-NEXT: s_add_u32 s36, s36, s11 +; MUBUF-NEXT: s_addc_u32 s37, s37, 0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x2000 ; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000 ; MUBUF-NEXT: v_mov_b32_e32 v3, 0 @@ -27,8 +27,8 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-NEXT: s_mov_b32 s32, 0xc0000 ; MUBUF-NEXT: s_waitcnt lgkmcnt(0) ; MUBUF-NEXT: v_mov_b32_e32 v0, s0 -; MUBUF-NEXT: s_mov_b64 s[0:1], s[48:49] -; MUBUF-NEXT: s_mov_b64 s[2:3], s[50:51] +; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37] +; MUBUF-NEXT: s_mov_b64 s[2:3], s[38:39] ; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] ; MUBUF-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 ; MUBUF-NEXT: s_and_saveexec_b32 s0, vcc_lo @@ -37,12 +37,12 @@ define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr ; MUBUF-NEXT: v_mov_b32_e32 v0, 0x4004 ; MUBUF-NEXT: s_mov_b32 s0, 0x41c64e6d ; MUBUF-NEXT: s_clause 0x1 -; MUBUF-NEXT: buffer_load_dword v1, v0, s[48:51], 0 offen -; MUBUF-NEXT: buffer_load_dword v2, v0, s[48:51], 0 offen offset:4 +; MUBUF-NEXT: buffer_load_dword v1, v0, s[36:39], 0 offen +; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1 ; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 -; MUBUF-NEXT: buffer_store_dword v0, v0, s[48:51], 0 offen +; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen ; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit ; MUBUF-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 3a078a64aa28e..4ddde7f297172 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function no_free_scratch_sgpr_for_bp_copy --version 5 +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Check that we properly realign the stack. While 4-byte access is all @@ -416,21 +416,21 @@ define void @no_free_scratch_sgpr_for_bp_copy(<32 x i32> %a, i32 %b) #0 { ; GCN-LABEL: no_free_scratch_sgpr_for_bp_copy: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s39, s34 +; GCN-NEXT: s_mov_b32 s41, s34 ; GCN-NEXT: s_mov_b32 s34, s32 ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 -; GCN-NEXT: s_mov_b32 s38, s33 +; GCN-NEXT: s_mov_b32 s40, s33 ; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000 ; GCN-NEXT: s_addk_i32 s32, 0x6000 ; GCN-NEXT: s_mov_b32 s32, s34 -; GCN-NEXT: s_mov_b32 s34, s39 +; GCN-NEXT: s_mov_b32 s34, s41 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:128 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s33, s38 +; GCN-NEXT: s_mov_b32 s33, s40 ; GCN-NEXT: s_setpc_b64 s[30:31] %local_val = alloca i32, align 128, addrspace(5) store volatile i32 %b, ptr addrspace(5) %local_val, align 128 @@ -454,78 +454,45 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: v_mov_b32_e32 v0, s34 +; GCN-NEXT: v_writelane_b32 v39, s4, 32 +; GCN-NEXT: v_writelane_b32 v39, s34, 33 ; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: v_writelane_b32 v39, s39, 0 -; GCN-NEXT: v_writelane_b32 v39, s40, 1 -; GCN-NEXT: v_writelane_b32 v39, s41, 2 -; GCN-NEXT: v_writelane_b32 v39, s42, 3 -; GCN-NEXT: v_writelane_b32 v39, s43, 4 -; GCN-NEXT: v_writelane_b32 v39, s44, 5 -; GCN-NEXT: v_writelane_b32 v39, s45, 6 -; GCN-NEXT: v_writelane_b32 v39, s46, 7 -; GCN-NEXT: v_writelane_b32 v39, s47, 8 -; GCN-NEXT: v_writelane_b32 v39, s48, 9 -; GCN-NEXT: v_writelane_b32 v39, s49, 10 -; GCN-NEXT: v_writelane_b32 v39, s50, 11 -; GCN-NEXT: v_writelane_b32 v39, s51, 12 -; GCN-NEXT: v_writelane_b32 v39, s52, 13 -; GCN-NEXT: v_writelane_b32 v39, s53, 14 -; GCN-NEXT: v_writelane_b32 v39, s54, 15 -; GCN-NEXT: v_writelane_b32 v39, s55, 16 -; GCN-NEXT: v_writelane_b32 v39, s56, 17 -; GCN-NEXT: v_writelane_b32 v39, s57, 18 -; GCN-NEXT: v_writelane_b32 v39, s58, 19 -; GCN-NEXT: v_writelane_b32 v39, s59, 20 -; GCN-NEXT: v_writelane_b32 v39, s60, 21 -; GCN-NEXT: v_writelane_b32 v39, s61, 22 -; GCN-NEXT: v_writelane_b32 v39, s62, 23 -; GCN-NEXT: v_writelane_b32 v39, s63, 24 -; GCN-NEXT: v_writelane_b32 v39, s64, 25 -; GCN-NEXT: v_writelane_b32 v39, s65, 26 -; GCN-NEXT: v_writelane_b32 v39, s66, 27 -; GCN-NEXT: v_writelane_b32 v39, s67, 28 -; GCN-NEXT: v_writelane_b32 v39, s68, 29 -; GCN-NEXT: v_writelane_b32 v39, s69, 30 -; GCN-NEXT: v_writelane_b32 v39, s70, 31 -; GCN-NEXT: v_writelane_b32 v39, s71, 32 -; GCN-NEXT: v_writelane_b32 v39, s72, 33 -; GCN-NEXT: v_writelane_b32 v39, s73, 34 -; GCN-NEXT: v_writelane_b32 v39, s74, 35 -; GCN-NEXT: v_writelane_b32 v39, s75, 36 -; GCN-NEXT: v_writelane_b32 v39, s76, 37 -; GCN-NEXT: v_writelane_b32 v39, s77, 38 -; GCN-NEXT: v_writelane_b32 v39, s78, 39 -; GCN-NEXT: v_writelane_b32 v39, s79, 40 -; GCN-NEXT: v_writelane_b32 v39, s80, 41 -; GCN-NEXT: v_writelane_b32 v39, s81, 42 -; GCN-NEXT: v_writelane_b32 v39, s82, 43 -; GCN-NEXT: v_writelane_b32 v39, s83, 44 -; GCN-NEXT: v_writelane_b32 v39, s84, 45 -; GCN-NEXT: v_writelane_b32 v39, s85, 46 -; GCN-NEXT: v_writelane_b32 v39, s86, 47 -; GCN-NEXT: v_writelane_b32 v39, s87, 48 -; GCN-NEXT: v_writelane_b32 v39, s88, 49 -; GCN-NEXT: v_writelane_b32 v39, s89, 50 -; GCN-NEXT: v_writelane_b32 v39, s90, 51 -; GCN-NEXT: v_writelane_b32 v39, s91, 52 -; GCN-NEXT: v_writelane_b32 v39, s92, 53 -; GCN-NEXT: v_writelane_b32 v39, s93, 54 -; GCN-NEXT: v_writelane_b32 v39, s94, 55 -; GCN-NEXT: v_writelane_b32 v39, s95, 56 -; GCN-NEXT: v_writelane_b32 v39, s96, 57 -; GCN-NEXT: v_writelane_b32 v39, s97, 58 -; GCN-NEXT: v_writelane_b32 v39, s98, 59 -; GCN-NEXT: v_writelane_b32 v39, s99, 60 -; GCN-NEXT: v_writelane_b32 v39, s100, 61 -; GCN-NEXT: v_writelane_b32 v39, s101, 62 -; GCN-NEXT: v_writelane_b32 v39, s102, 63 +; GCN-NEXT: v_writelane_b32 v39, s48, 1 +; GCN-NEXT: v_writelane_b32 v39, s49, 2 +; GCN-NEXT: v_writelane_b32 v39, s50, 3 +; GCN-NEXT: v_writelane_b32 v39, s51, 4 +; GCN-NEXT: v_writelane_b32 v39, s52, 5 +; GCN-NEXT: v_writelane_b32 v39, s53, 6 +; GCN-NEXT: v_writelane_b32 v39, s54, 7 +; GCN-NEXT: v_writelane_b32 v39, s55, 8 +; GCN-NEXT: v_writelane_b32 v39, s64, 9 +; GCN-NEXT: v_writelane_b32 v39, s65, 10 +; GCN-NEXT: v_writelane_b32 v39, s66, 11 +; GCN-NEXT: v_writelane_b32 v39, s67, 12 +; GCN-NEXT: v_writelane_b32 v39, s68, 13 +; GCN-NEXT: v_writelane_b32 v39, s69, 14 +; GCN-NEXT: v_writelane_b32 v39, s70, 15 +; GCN-NEXT: v_writelane_b32 v39, s71, 16 +; GCN-NEXT: v_writelane_b32 v39, s80, 17 +; GCN-NEXT: v_writelane_b32 v39, s81, 18 +; GCN-NEXT: v_writelane_b32 v39, s82, 19 +; GCN-NEXT: v_writelane_b32 v39, s83, 20 +; GCN-NEXT: v_writelane_b32 v39, s84, 21 +; GCN-NEXT: v_writelane_b32 v39, s85, 22 +; GCN-NEXT: v_writelane_b32 v39, s86, 23 +; GCN-NEXT: v_writelane_b32 v39, s87, 24 +; GCN-NEXT: v_writelane_b32 v39, s96, 25 +; GCN-NEXT: v_writelane_b32 v39, s97, 26 +; GCN-NEXT: v_writelane_b32 v39, s98, 27 +; GCN-NEXT: v_writelane_b32 v39, s99, 28 +; GCN-NEXT: v_writelane_b32 v39, s100, 29 +; GCN-NEXT: v_writelane_b32 v39, s101, 30 ; GCN-NEXT: s_addk_i32 s32, 0x6000 +; GCN-NEXT: v_writelane_b32 v39, s102, 31 ; GCN-NEXT: s_mov_b32 s32, s34 +; GCN-NEXT: v_readlane_b32 s34, v39, 33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:128 ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -535,76 +502,39 @@ define void @no_free_regs_spill_bp_to_memory(<32 x i32> %a, i32 %b) #5 { ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; clobber all VGPRs ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: v_readlane_b32 s102, v39, 63 -; GCN-NEXT: v_readlane_b32 s101, v39, 62 -; GCN-NEXT: v_readlane_b32 s100, v39, 61 -; GCN-NEXT: v_readlane_b32 s99, v39, 60 -; GCN-NEXT: v_readlane_b32 s98, v39, 59 -; GCN-NEXT: v_readlane_b32 s97, v39, 58 -; GCN-NEXT: v_readlane_b32 s96, v39, 57 -; GCN-NEXT: v_readlane_b32 s95, v39, 56 -; GCN-NEXT: v_readlane_b32 s94, v39, 55 -; GCN-NEXT: v_readlane_b32 s93, v39, 54 -; GCN-NEXT: v_readlane_b32 s92, v39, 53 -; GCN-NEXT: v_readlane_b32 s91, v39, 52 -; GCN-NEXT: v_readlane_b32 s90, v39, 51 -; GCN-NEXT: v_readlane_b32 s89, v39, 50 -; GCN-NEXT: v_readlane_b32 s88, v39, 49 -; GCN-NEXT: v_readlane_b32 s87, v39, 48 -; GCN-NEXT: v_readlane_b32 s86, v39, 47 -; GCN-NEXT: v_readlane_b32 s85, v39, 46 -; GCN-NEXT: v_readlane_b32 s84, v39, 45 -; GCN-NEXT: v_readlane_b32 s83, v39, 44 -; GCN-NEXT: v_readlane_b32 s82, v39, 43 -; GCN-NEXT: v_readlane_b32 s81, v39, 42 -; GCN-NEXT: v_readlane_b32 s80, v39, 41 -; GCN-NEXT: v_readlane_b32 s79, v39, 40 -; GCN-NEXT: v_readlane_b32 s78, v39, 39 -; GCN-NEXT: v_readlane_b32 s77, v39, 38 -; GCN-NEXT: v_readlane_b32 s76, v39, 37 -; GCN-NEXT: v_readlane_b32 s75, v39, 36 -; GCN-NEXT: v_readlane_b32 s74, v39, 35 -; GCN-NEXT: v_readlane_b32 s73, v39, 34 -; GCN-NEXT: v_readlane_b32 s72, v39, 33 -; GCN-NEXT: v_readlane_b32 s71, v39, 32 -; GCN-NEXT: v_readlane_b32 s70, v39, 31 -; GCN-NEXT: v_readlane_b32 s69, v39, 30 -; GCN-NEXT: v_readlane_b32 s68, v39, 29 -; GCN-NEXT: v_readlane_b32 s67, v39, 28 -; GCN-NEXT: v_readlane_b32 s66, v39, 27 -; GCN-NEXT: v_readlane_b32 s65, v39, 26 -; GCN-NEXT: v_readlane_b32 s64, v39, 25 -; GCN-NEXT: v_readlane_b32 s63, v39, 24 -; GCN-NEXT: v_readlane_b32 s62, v39, 23 -; GCN-NEXT: v_readlane_b32 s61, v39, 22 -; GCN-NEXT: v_readlane_b32 s60, v39, 21 -; GCN-NEXT: v_readlane_b32 s59, v39, 20 -; GCN-NEXT: v_readlane_b32 s58, v39, 19 -; GCN-NEXT: v_readlane_b32 s57, v39, 18 -; GCN-NEXT: v_readlane_b32 s56, v39, 17 -; GCN-NEXT: v_readlane_b32 s55, v39, 16 -; GCN-NEXT: v_readlane_b32 s54, v39, 15 -; GCN-NEXT: v_readlane_b32 s53, v39, 14 -; GCN-NEXT: v_readlane_b32 s52, v39, 13 -; GCN-NEXT: v_readlane_b32 s51, v39, 12 -; GCN-NEXT: v_readlane_b32 s50, v39, 11 -; GCN-NEXT: v_readlane_b32 s49, v39, 10 -; GCN-NEXT: v_readlane_b32 s48, v39, 9 -; GCN-NEXT: v_readlane_b32 s47, v39, 8 -; GCN-NEXT: v_readlane_b32 s46, v39, 7 -; GCN-NEXT: v_readlane_b32 s45, v39, 6 -; GCN-NEXT: v_readlane_b32 s44, v39, 5 -; GCN-NEXT: v_readlane_b32 s43, v39, 4 -; GCN-NEXT: v_readlane_b32 s42, v39, 3 -; GCN-NEXT: v_readlane_b32 s41, v39, 2 -; GCN-NEXT: v_readlane_b32 s40, v39, 1 +; GCN-NEXT: v_readlane_b32 s102, v39, 31 +; GCN-NEXT: v_readlane_b32 s101, v39, 30 +; GCN-NEXT: v_readlane_b32 s100, v39, 29 +; GCN-NEXT: v_readlane_b32 s99, v39, 28 +; GCN-NEXT: v_readlane_b32 s98, v39, 27 +; GCN-NEXT: v_readlane_b32 s97, v39, 26 +; GCN-NEXT: v_readlane_b32 s96, v39, 25 +; GCN-NEXT: v_readlane_b32 s87, v39, 24 +; GCN-NEXT: v_readlane_b32 s86, v39, 23 +; GCN-NEXT: v_readlane_b32 s85, v39, 22 +; GCN-NEXT: v_readlane_b32 s84, v39, 21 +; GCN-NEXT: v_readlane_b32 s83, v39, 20 +; GCN-NEXT: v_readlane_b32 s82, v39, 19 +; GCN-NEXT: v_readlane_b32 s81, v39, 18 +; GCN-NEXT: v_readlane_b32 s80, v39, 17 +; GCN-NEXT: v_readlane_b32 s71, v39, 16 +; GCN-NEXT: v_readlane_b32 s70, v39, 15 +; GCN-NEXT: v_readlane_b32 s69, v39, 14 +; GCN-NEXT: v_readlane_b32 s68, v39, 13 +; GCN-NEXT: v_readlane_b32 s67, v39, 12 +; GCN-NEXT: v_readlane_b32 s66, v39, 11 +; GCN-NEXT: v_readlane_b32 s65, v39, 10 +; GCN-NEXT: v_readlane_b32 s64, v39, 9 +; GCN-NEXT: v_readlane_b32 s55, v39, 8 +; GCN-NEXT: v_readlane_b32 s54, v39, 7 +; GCN-NEXT: v_readlane_b32 s53, v39, 6 +; GCN-NEXT: v_readlane_b32 s52, v39, 5 +; GCN-NEXT: v_readlane_b32 s51, v39, 4 +; GCN-NEXT: v_readlane_b32 s50, v39, 3 +; GCN-NEXT: v_readlane_b32 s49, v39, 2 +; GCN-NEXT: v_readlane_b32 s48, v39, 1 ; GCN-NEXT: v_readlane_b32 s39, v39, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readfirstlane_b32 s4, v0 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readfirstlane_b32 s34, v0 +; GCN-NEXT: v_readlane_b32 s4, v39, 32 ; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[6:7] @@ -647,81 +577,46 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-NEXT: s_add_i32 s5, s33, 0x42100 ; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s5 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_add_i32 s5, s33, 0x42200 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill -; GCN-NEXT: v_mov_b32_e32 v0, s34 -; GCN-NEXT: s_add_i32 s5, s33, 0x42300 +; GCN-NEXT: v_writelane_b32 v39, s4, 32 +; GCN-NEXT: v_writelane_b32 v39, s34, 33 ; GCN-NEXT: s_mov_b32 s34, s32 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s34 offset:4 ; GCN-NEXT: v_writelane_b32 v39, s39, 0 -; GCN-NEXT: v_writelane_b32 v39, s40, 1 -; GCN-NEXT: v_writelane_b32 v39, s41, 2 -; GCN-NEXT: v_writelane_b32 v39, s42, 3 -; GCN-NEXT: v_writelane_b32 v39, s43, 4 -; GCN-NEXT: v_writelane_b32 v39, s44, 5 -; GCN-NEXT: v_writelane_b32 v39, s45, 6 -; GCN-NEXT: v_writelane_b32 v39, s46, 7 -; GCN-NEXT: v_writelane_b32 v39, s47, 8 -; GCN-NEXT: v_writelane_b32 v39, s48, 9 -; GCN-NEXT: v_writelane_b32 v39, s49, 10 -; GCN-NEXT: v_writelane_b32 v39, s50, 11 -; GCN-NEXT: v_writelane_b32 v39, s51, 12 -; GCN-NEXT: v_writelane_b32 v39, s52, 13 -; GCN-NEXT: v_writelane_b32 v39, s53, 14 -; GCN-NEXT: v_writelane_b32 v39, s54, 15 -; GCN-NEXT: v_writelane_b32 v39, s55, 16 -; GCN-NEXT: v_writelane_b32 v39, s56, 17 -; GCN-NEXT: v_writelane_b32 v39, s57, 18 -; GCN-NEXT: v_writelane_b32 v39, s58, 19 -; GCN-NEXT: v_writelane_b32 v39, s59, 20 -; GCN-NEXT: v_writelane_b32 v39, s60, 21 -; GCN-NEXT: v_writelane_b32 v39, s61, 22 -; GCN-NEXT: v_writelane_b32 v39, s62, 23 -; GCN-NEXT: v_writelane_b32 v39, s63, 24 -; GCN-NEXT: v_writelane_b32 v39, s64, 25 -; GCN-NEXT: v_writelane_b32 v39, s65, 26 -; GCN-NEXT: v_writelane_b32 v39, s66, 27 -; GCN-NEXT: v_writelane_b32 v39, s67, 28 -; GCN-NEXT: v_writelane_b32 v39, s68, 29 -; GCN-NEXT: v_writelane_b32 v39, s69, 30 -; GCN-NEXT: v_writelane_b32 v39, s70, 31 -; GCN-NEXT: v_writelane_b32 v39, s71, 32 -; GCN-NEXT: v_writelane_b32 v39, s72, 33 -; GCN-NEXT: v_writelane_b32 v39, s73, 34 -; GCN-NEXT: v_writelane_b32 v39, s74, 35 -; GCN-NEXT: v_writelane_b32 v39, s75, 36 -; GCN-NEXT: v_writelane_b32 v39, s76, 37 -; GCN-NEXT: v_writelane_b32 v39, s77, 38 -; GCN-NEXT: v_writelane_b32 v39, s78, 39 -; GCN-NEXT: v_writelane_b32 v39, s79, 40 -; GCN-NEXT: v_writelane_b32 v39, s80, 41 -; GCN-NEXT: v_writelane_b32 v39, s81, 42 -; GCN-NEXT: v_writelane_b32 v39, s82, 43 -; GCN-NEXT: v_writelane_b32 v39, s83, 44 -; GCN-NEXT: v_writelane_b32 v39, s84, 45 -; GCN-NEXT: v_writelane_b32 v39, s85, 46 -; GCN-NEXT: v_writelane_b32 v39, s86, 47 -; GCN-NEXT: v_writelane_b32 v39, s87, 48 -; GCN-NEXT: v_writelane_b32 v39, s88, 49 -; GCN-NEXT: v_writelane_b32 v39, s89, 50 -; GCN-NEXT: v_writelane_b32 v39, s90, 51 -; GCN-NEXT: v_writelane_b32 v39, s91, 52 -; GCN-NEXT: v_writelane_b32 v39, s92, 53 -; GCN-NEXT: v_writelane_b32 v39, s93, 54 -; GCN-NEXT: v_writelane_b32 v39, s94, 55 -; GCN-NEXT: v_writelane_b32 v39, s95, 56 -; GCN-NEXT: v_writelane_b32 v39, s96, 57 -; GCN-NEXT: v_writelane_b32 v39, s97, 58 -; GCN-NEXT: v_writelane_b32 v39, s98, 59 -; GCN-NEXT: v_writelane_b32 v39, s99, 60 -; GCN-NEXT: v_writelane_b32 v39, s100, 61 -; GCN-NEXT: v_writelane_b32 v39, s101, 62 +; GCN-NEXT: v_writelane_b32 v39, s48, 1 +; GCN-NEXT: v_writelane_b32 v39, s49, 2 +; GCN-NEXT: v_writelane_b32 v39, s50, 3 +; GCN-NEXT: v_writelane_b32 v39, s51, 4 +; GCN-NEXT: v_writelane_b32 v39, s52, 5 +; GCN-NEXT: v_writelane_b32 v39, s53, 6 +; GCN-NEXT: v_writelane_b32 v39, s54, 7 +; GCN-NEXT: v_writelane_b32 v39, s55, 8 +; GCN-NEXT: v_writelane_b32 v39, s64, 9 +; GCN-NEXT: v_writelane_b32 v39, s65, 10 +; GCN-NEXT: v_writelane_b32 v39, s66, 11 +; GCN-NEXT: v_writelane_b32 v39, s67, 12 +; GCN-NEXT: v_writelane_b32 v39, s68, 13 +; GCN-NEXT: v_writelane_b32 v39, s69, 14 +; GCN-NEXT: v_writelane_b32 v39, s70, 15 +; GCN-NEXT: v_writelane_b32 v39, s71, 16 +; GCN-NEXT: v_writelane_b32 v39, s80, 17 +; GCN-NEXT: v_writelane_b32 v39, s81, 18 +; GCN-NEXT: v_writelane_b32 v39, s82, 19 +; GCN-NEXT: v_writelane_b32 v39, s83, 20 +; GCN-NEXT: v_writelane_b32 v39, s84, 21 +; GCN-NEXT: v_writelane_b32 v39, s85, 22 +; GCN-NEXT: v_writelane_b32 v39, s86, 23 +; GCN-NEXT: v_writelane_b32 v39, s87, 24 +; GCN-NEXT: v_writelane_b32 v39, s96, 25 +; GCN-NEXT: v_writelane_b32 v39, s97, 26 +; GCN-NEXT: v_writelane_b32 v39, s98, 27 +; GCN-NEXT: v_writelane_b32 v39, s99, 28 +; GCN-NEXT: v_writelane_b32 v39, s100, 29 +; GCN-NEXT: v_writelane_b32 v39, s101, 30 ; GCN-NEXT: v_mov_b32_e32 v1, 0x1080 -; GCN-NEXT: v_writelane_b32 v39, s102, 63 ; GCN-NEXT: s_add_i32 s32, s32, 0x46000 +; GCN-NEXT: v_writelane_b32 v39, s102, 31 ; GCN-NEXT: s_mov_b32 s32, s34 +; GCN-NEXT: v_readlane_b32 s34, v39, 33 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s33 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -731,78 +626,39 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; clobber all VGPRs ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_add_i32 s5, s33, 0x42200 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GCN-NEXT: s_add_i32 s5, s33, 0x42300 -; GCN-NEXT: v_readlane_b32 s102, v39, 63 -; GCN-NEXT: v_readlane_b32 s101, v39, 62 -; GCN-NEXT: v_readlane_b32 s100, v39, 61 -; GCN-NEXT: v_readlane_b32 s99, v39, 60 -; GCN-NEXT: v_readlane_b32 s98, v39, 59 -; GCN-NEXT: v_readlane_b32 s97, v39, 58 -; GCN-NEXT: v_readlane_b32 s96, v39, 57 -; GCN-NEXT: v_readlane_b32 s95, v39, 56 -; GCN-NEXT: v_readlane_b32 s94, v39, 55 -; GCN-NEXT: v_readlane_b32 s93, v39, 54 -; GCN-NEXT: v_readlane_b32 s92, v39, 53 -; GCN-NEXT: v_readlane_b32 s91, v39, 52 -; GCN-NEXT: v_readlane_b32 s90, v39, 51 -; GCN-NEXT: v_readlane_b32 s89, v39, 50 -; GCN-NEXT: v_readlane_b32 s88, v39, 49 -; GCN-NEXT: v_readlane_b32 s87, v39, 48 -; GCN-NEXT: v_readlane_b32 s86, v39, 47 -; GCN-NEXT: v_readlane_b32 s85, v39, 46 -; GCN-NEXT: v_readlane_b32 s84, v39, 45 -; GCN-NEXT: v_readlane_b32 s83, v39, 44 -; GCN-NEXT: v_readlane_b32 s82, v39, 43 -; GCN-NEXT: v_readlane_b32 s81, v39, 42 -; GCN-NEXT: v_readlane_b32 s80, v39, 41 -; GCN-NEXT: v_readlane_b32 s79, v39, 40 -; GCN-NEXT: v_readlane_b32 s78, v39, 39 -; GCN-NEXT: v_readlane_b32 s77, v39, 38 -; GCN-NEXT: v_readlane_b32 s76, v39, 37 -; GCN-NEXT: v_readlane_b32 s75, v39, 36 -; GCN-NEXT: v_readlane_b32 s74, v39, 35 -; GCN-NEXT: v_readlane_b32 s73, v39, 34 -; GCN-NEXT: v_readlane_b32 s72, v39, 33 -; GCN-NEXT: v_readlane_b32 s71, v39, 32 -; GCN-NEXT: v_readlane_b32 s70, v39, 31 -; GCN-NEXT: v_readlane_b32 s69, v39, 30 -; GCN-NEXT: v_readlane_b32 s68, v39, 29 -; GCN-NEXT: v_readlane_b32 s67, v39, 28 -; GCN-NEXT: v_readlane_b32 s66, v39, 27 -; GCN-NEXT: v_readlane_b32 s65, v39, 26 -; GCN-NEXT: v_readlane_b32 s64, v39, 25 -; GCN-NEXT: v_readlane_b32 s63, v39, 24 -; GCN-NEXT: v_readlane_b32 s62, v39, 23 -; GCN-NEXT: v_readlane_b32 s61, v39, 22 -; GCN-NEXT: v_readlane_b32 s60, v39, 21 -; GCN-NEXT: v_readlane_b32 s59, v39, 20 -; GCN-NEXT: v_readlane_b32 s58, v39, 19 -; GCN-NEXT: v_readlane_b32 s57, v39, 18 -; GCN-NEXT: v_readlane_b32 s56, v39, 17 -; GCN-NEXT: v_readlane_b32 s55, v39, 16 -; GCN-NEXT: v_readlane_b32 s54, v39, 15 -; GCN-NEXT: v_readlane_b32 s53, v39, 14 -; GCN-NEXT: v_readlane_b32 s52, v39, 13 -; GCN-NEXT: v_readlane_b32 s51, v39, 12 -; GCN-NEXT: v_readlane_b32 s50, v39, 11 -; GCN-NEXT: v_readlane_b32 s49, v39, 10 -; GCN-NEXT: v_readlane_b32 s48, v39, 9 -; GCN-NEXT: v_readlane_b32 s47, v39, 8 -; GCN-NEXT: v_readlane_b32 s46, v39, 7 -; GCN-NEXT: v_readlane_b32 s45, v39, 6 -; GCN-NEXT: v_readlane_b32 s44, v39, 5 -; GCN-NEXT: v_readlane_b32 s43, v39, 4 -; GCN-NEXT: v_readlane_b32 s42, v39, 3 -; GCN-NEXT: v_readlane_b32 s41, v39, 2 -; GCN-NEXT: v_readlane_b32 s40, v39, 1 +; GCN-NEXT: v_readlane_b32 s102, v39, 31 +; GCN-NEXT: v_readlane_b32 s101, v39, 30 +; GCN-NEXT: v_readlane_b32 s100, v39, 29 +; GCN-NEXT: v_readlane_b32 s99, v39, 28 +; GCN-NEXT: v_readlane_b32 s98, v39, 27 +; GCN-NEXT: v_readlane_b32 s97, v39, 26 +; GCN-NEXT: v_readlane_b32 s96, v39, 25 +; GCN-NEXT: v_readlane_b32 s87, v39, 24 +; GCN-NEXT: v_readlane_b32 s86, v39, 23 +; GCN-NEXT: v_readlane_b32 s85, v39, 22 +; GCN-NEXT: v_readlane_b32 s84, v39, 21 +; GCN-NEXT: v_readlane_b32 s83, v39, 20 +; GCN-NEXT: v_readlane_b32 s82, v39, 19 +; GCN-NEXT: v_readlane_b32 s81, v39, 18 +; GCN-NEXT: v_readlane_b32 s80, v39, 17 +; GCN-NEXT: v_readlane_b32 s71, v39, 16 +; GCN-NEXT: v_readlane_b32 s70, v39, 15 +; GCN-NEXT: v_readlane_b32 s69, v39, 14 +; GCN-NEXT: v_readlane_b32 s68, v39, 13 +; GCN-NEXT: v_readlane_b32 s67, v39, 12 +; GCN-NEXT: v_readlane_b32 s66, v39, 11 +; GCN-NEXT: v_readlane_b32 s65, v39, 10 +; GCN-NEXT: v_readlane_b32 s64, v39, 9 +; GCN-NEXT: v_readlane_b32 s55, v39, 8 +; GCN-NEXT: v_readlane_b32 s54, v39, 7 +; GCN-NEXT: v_readlane_b32 s53, v39, 6 +; GCN-NEXT: v_readlane_b32 s52, v39, 5 +; GCN-NEXT: v_readlane_b32 s51, v39, 4 +; GCN-NEXT: v_readlane_b32 s50, v39, 3 +; GCN-NEXT: v_readlane_b32 s49, v39, 2 +; GCN-NEXT: v_readlane_b32 s48, v39, 1 ; GCN-NEXT: v_readlane_b32 s39, v39, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readfirstlane_b32 s4, v0 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readfirstlane_b32 s34, v0 +; GCN-NEXT: v_readlane_b32 s4, v39, 32 ; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1 ; GCN-NEXT: s_add_i32 s5, s33, 0x42100 ; GCN-NEXT: buffer_load_dword v39, off, s[0:3], s5 ; 4-byte Folded Reload diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index f7300c921a745..04da358a49bc3 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -32,14 +32,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-LABEL: kernel: ; GLOBALNESS1: ; %bb.0: ; %bb ; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7] -; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0 +; GLOBALNESS1-NEXT: s_load_dwordx4 s[52:55], s[8:9], 0x0 ; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[96:97] +; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[52:53] ; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5] ; GLOBALNESS1-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS1-NEXT: s_load_dword s7, s[8:9], 0x20 @@ -49,7 +49,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400 -; GLOBALNESS1-NEXT: s_bitcmp1_b32 s98, 0 +; GLOBALNESS1-NEXT: s_bitcmp1_b32 s54, 0 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1] ; GLOBALNESS1-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0 @@ -68,7 +68,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS1-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS1-NEXT: s_mov_b64 s[46:47], s[8:9] +; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9] ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 ; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 @@ -76,7 +76,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_writelane_b32 v56, s8, 0 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0 ; GLOBALNESS1-NEXT: v_writelane_b32 v56, s9, 1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[70:71], 1, v3 ; GLOBALNESS1-NEXT: s_mov_b32 s82, s16 ; GLOBALNESS1-NEXT: s_mov_b32 s83, s15 ; GLOBALNESS1-NEXT: s_mov_b32 s84, s14 @@ -102,8 +102,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 6 ; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 7 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s62, 8 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s63, 9 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s70, 8 +; GLOBALNESS1-NEXT: v_writelane_b32 v56, s71, 9 ; GLOBALNESS1-NEXT: s_branch .LBB1_4 ; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -127,10 +127,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0 ; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1] -; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40 +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0 ; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1] -; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 @@ -144,17 +144,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[62:63] +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[70:71] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_9 ; GLOBALNESS1-NEXT: ; %bb.5: ; %NodeBlock ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lt_i32 s99, 1 +; GLOBALNESS1-NEXT: s_cmp_lt_i32 s55, 1 ; GLOBALNESS1-NEXT: s_cbranch_scc1 .LBB1_7 ; GLOBALNESS1-NEXT: ; %bb.6: ; %LeafBlock12 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 1 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s55, 1 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_8 @@ -164,7 +164,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: .LBB1_8: ; %LeafBlock ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_cmp_lg_u32 s99, 0 +; GLOBALNESS1-NEXT: s_cmp_lg_u32 s55, 0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25 @@ -176,10 +176,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3] ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0 +; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[86:87], 0, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[94:95] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[86:87] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -238,33 +238,33 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS1-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: s_add_u32 s78, s46, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s79, s47, 0 +; GLOBALNESS1-NEXT: s_add_u32 s70, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s71, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 -; GLOBALNESS1-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0 +; GLOBALNESS1-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 ; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] -; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[78:79] +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71] ; GLOBALNESS1-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS1-NEXT: s_mov_b32 s12, s84 ; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off -; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i @@ -278,14 +278,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_branch .LBB1_3 ; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_load_dwordx4 s[96:99], s[46:47], 0x0 -; GLOBALNESS1-NEXT: v_readlane_b32 s62, v56, 8 +; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0 +; GLOBALNESS1-NEXT: v_readlane_b32 s70, v56, 8 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: v_readlane_b32 s63, v56, 9 +; GLOBALNESS1-NEXT: v_readlane_b32 s71, v56, 9 +; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) +; GLOBALNESS1-NEXT: s_mov_b32 s55, s7 ; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53] -; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[94:95] +; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[86:87] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 ; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -310,8 +312,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32 ; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0 +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 @@ -328,8 +330,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34 ; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i -; GLOBALNESS1-NEXT: s_add_u32 s8, s46, 40 -; GLOBALNESS1-NEXT: s_addc_u32 s9, s47, 0 +; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS1-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS1-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 @@ -346,14 +348,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-LABEL: kernel: ; GLOBALNESS0: ; %bb.0: ; %bb ; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[6:7] -; GLOBALNESS0-NEXT: s_load_dwordx4 s[96:99], s[8:9], 0x0 +; GLOBALNESS0-NEXT: s_load_dwordx4 s[52:55], s[8:9], 0x0 ; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[96:97] +; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[52:53] ; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5] ; GLOBALNESS0-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 ; GLOBALNESS0-NEXT: s_load_dword s7, s[8:9], 0x20 @@ -363,7 +365,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400 -; GLOBALNESS0-NEXT: s_bitcmp1_b32 s98, 0 +; GLOBALNESS0-NEXT: s_bitcmp1_b32 s54, 0 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e32 vcc, s[4:5], v[0:1] ; GLOBALNESS0-NEXT: v_cmp_ngt_f64_e64 s[4:5], s[4:5], 0 @@ -382,7 +384,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] ; GLOBALNESS0-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 -; GLOBALNESS0-NEXT: s_mov_b64 s[46:47], s[8:9] +; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9] ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 ; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 @@ -390,9 +392,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_writelane_b32 v56, s8, 0 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0 ; GLOBALNESS0-NEXT: v_writelane_b32 v56, s9, 1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[62:63], 1, v3 -; GLOBALNESS0-NEXT: s_mov_b32 s78, s16 -; GLOBALNESS0-NEXT: s_mov_b32 s79, s15 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[84:85], 1, v3 +; GLOBALNESS0-NEXT: s_mov_b32 s70, s16 +; GLOBALNESS0-NEXT: s_mov_b32 s71, s15 ; GLOBALNESS0-NEXT: s_mov_b32 s82, s14 ; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11] ; GLOBALNESS0-NEXT: s_mov_b32 s32, 0 @@ -416,8 +418,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 6 ; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 7 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s62, 8 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s63, 9 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s84, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v56, s85, 9 ; GLOBALNESS0-NEXT: s_branch .LBB1_4 ; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -441,10 +443,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0 ; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1] -; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40 +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0 ; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1] -; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 @@ -453,22 +455,22 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[62:63] +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_9 ; GLOBALNESS0-NEXT: ; %bb.5: ; %NodeBlock ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lt_i32 s99, 1 +; GLOBALNESS0-NEXT: s_cmp_lt_i32 s55, 1 ; GLOBALNESS0-NEXT: s_cbranch_scc1 .LBB1_7 ; GLOBALNESS0-NEXT: ; %bb.6: ; %LeafBlock12 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 1 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s55, 1 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_8 @@ -478,7 +480,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: .LBB1_8: ; %LeafBlock ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_cmp_lg_u32 s99, 0 +; GLOBALNESS0-NEXT: s_cmp_lg_u32 s55, 0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25 @@ -490,17 +492,17 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 ; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3] ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[94:95], 0, v0 +; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[86:87], 0, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000 -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[94:95] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[86:87] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 ; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off ; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 2 ; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 3 -; GLOBALNESS0-NEXT: s_mov_b32 s83, s99 +; GLOBALNESS0-NEXT: s_mov_b32 s83, s55 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i @@ -553,33 +555,33 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_15 ; GLOBALNESS0-NEXT: ; %bb.22: ; %bb55.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: s_add_u32 s84, s46, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s85, s47, 0 +; GLOBALNESS0-NEXT: s_add_u32 s84, s38, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s85, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s5, s5, wobble@gotpcrel32@hi+12 -; GLOBALNESS0-NEXT: s_load_dwordx2 s[62:63], s[4:5], 0x0 +; GLOBALNESS0-NEXT: s_load_dwordx2 s[54:55], s[4:5], 0x0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off -; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[62:63] +; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i @@ -593,14 +595,14 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_branch .LBB1_3 ; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_readlane_b32 s62, v56, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v56, 8 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: s_mov_b32 s99, s83 -; GLOBALNESS0-NEXT: v_readlane_b32 s63, v56, 9 +; GLOBALNESS0-NEXT: s_mov_b32 s55, s83 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v56, 9 ; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53] -; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[94:95] +; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[86:87] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 ; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 @@ -625,8 +627,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 ; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32 ; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0 +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 @@ -634,8 +636,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 @@ -643,8 +645,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34 ; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i -; GLOBALNESS0-NEXT: s_add_u32 s8, s46, 40 -; GLOBALNESS0-NEXT: s_addc_u32 s9, s47, 0 +; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 +; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] ; GLOBALNESS0-NEXT: s_add_u32 s16, s16, widget@rel32@lo+4 ; GLOBALNESS0-NEXT: s_addc_u32 s17, s17, widget@rel32@hi+12 @@ -652,8 +654,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[10:11], s[34:35] ; GLOBALNESS0-NEXT: s_mov_b32 s12, s82 -; GLOBALNESS0-NEXT: s_mov_b32 s13, s79 -; GLOBALNESS0-NEXT: s_mov_b32 s14, s78 +; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 +; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock diff --git a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir index edb1f74d738f5..0df2e651a15e1 100644 --- a/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir +++ b/llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir @@ -27,11 +27,11 @@ body: | ; CHECK-NEXT: renamable $sgpr4 = COPY $sgpr0 ; CHECK-NEXT: SI_SPILL_S128_SAVE $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr5 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1056964608 - ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr5 + ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr0 ; CHECK-NEXT: renamable $sgpr8 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr5 @@ -43,46 +43,46 @@ body: | ; CHECK-NEXT: renamable $sgpr15 = COPY renamable $sgpr5 ; CHECK-NEXT: renamable $vgpr5_vgpr6 = COPY killed renamable $sgpr0_sgpr1 ; CHECK-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1088, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr76_sgpr77_sgpr78_sgpr79 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX4_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1200 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1152, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71 = S_LOAD_DWORDX4_IMM renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: KILL killed renamable $sgpr0, renamable $sgpr1 ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1264 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1216, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK-NEXT: renamable $sgpr80_sgpr81_sgpr82_sgpr83 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1328 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6) - ; CHECK-NEXT: renamable $sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK-NEXT: renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1280, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK-NEXT: renamable $sgpr84_sgpr85_sgpr86_sgpr87 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) + ; CHECK-NEXT: renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1344, 0 :: (dereferenceable load (s256), addrspace 6) ; CHECK-NEXT: renamable $sgpr0 = S_MOV_B32 1392 ; CHECK-NEXT: renamable $sgpr1 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s256), addrspace 6) + ; CHECK-NEXT: renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 0, 0 :: (load (s256), addrspace 6) ; CHECK-NEXT: renamable $sgpr2 = S_MOV_B32 1456 ; CHECK-NEXT: renamable $sgpr3 = COPY renamable $sgpr5 - ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load (s256), addrspace 6) + ; CHECK-NEXT: renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 = S_LOAD_DWORDX8_IMM renamable $sgpr4_sgpr5, 1472, 0 :: (dereferenceable load (s256), addrspace 6) ; CHECK-NEXT: renamable $sgpr4 = S_MOV_B32 1520 ; CHECK-NEXT: renamable $sgpr96_sgpr97_sgpr98_sgpr99 = S_LOAD_DWORDX4_IMM killed renamable $sgpr2_sgpr3, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s128), addrspace 6) ; CHECK-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr0_sgpr1, 0, 0 :: (load (s128), addrspace 6) - ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr76_sgpr77_sgpr78_sgpr79, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr92_sgpr93_sgpr94_sgpr95, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) - ; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr7 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr8 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, killed renamable $sgpr64_sgpr65_sgpr66_sgpr67, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr9 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47, killed renamable $sgpr68_sgpr69_sgpr70_sgpr71, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr10 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr11 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr12 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, killed renamable $sgpr96_sgpr97_sgpr98_sgpr99, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr13 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) + ; CHECK-NEXT: renamable $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 renamable $vgpr5_vgpr6, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128)) ; CHECK-NEXT: renamable $sgpr8_sgpr9_sgpr10_sgpr11 = SI_SPILL_S128_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: renamable $vgpr1_vgpr2_vgpr3_vgpr4 = BUFFER_LOAD_FORMAT_XYZW_IDXEN renamable $vgpr0, renamable $sgpr8_sgpr9_sgpr10_sgpr11, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 7) ; CHECK-NEXT: KILL killed renamable $sgpr4_sgpr5_sgpr6_sgpr7 - ; CHECK-NEXT: KILL killed renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 + ; CHECK-NEXT: KILL killed renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 ; CHECK-NEXT: KILL killed renamable $vgpr5_vgpr6 ; CHECK-NEXT: KILL killed renamable $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: KILL killed renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 + ; CHECK-NEXT: KILL killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; CHECK-NEXT: KILL killed renamable $sgpr8_sgpr9_sgpr10_sgpr11 ; CHECK-NEXT: KILL killed renamable $vgpr0 ; CHECK-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 killed $vgpr7, killed $vgpr8, implicit $mode, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index 9afa0e2bb2dcd..f08e5be0fd742 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -20,16 +20,16 @@ define hidden void @widget() { ; GCN-NEXT: v_writelane_b32 v41, s35, 3 ; GCN-NEXT: v_writelane_b32 v41, s36, 4 ; GCN-NEXT: v_writelane_b32 v41, s37, 5 -; GCN-NEXT: v_writelane_b32 v41, s46, 6 -; GCN-NEXT: v_writelane_b32 v41, s47, 7 +; GCN-NEXT: v_writelane_b32 v41, s38, 6 +; GCN-NEXT: v_writelane_b32 v41, s39, 7 ; GCN-NEXT: v_writelane_b32 v41, s48, 8 ; GCN-NEXT: v_writelane_b32 v41, s49, 9 ; GCN-NEXT: v_writelane_b32 v41, s50, 10 ; GCN-NEXT: v_writelane_b32 v41, s51, 11 ; GCN-NEXT: v_writelane_b32 v41, s52, 12 ; GCN-NEXT: v_writelane_b32 v41, s53, 13 -; GCN-NEXT: v_writelane_b32 v41, s62, 14 -; GCN-NEXT: v_writelane_b32 v41, s63, 15 +; GCN-NEXT: v_writelane_b32 v41, s54, 14 +; GCN-NEXT: v_writelane_b32 v41, s55, 15 ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] @@ -37,7 +37,7 @@ define hidden void @widget() { ; GCN-NEXT: s_mov_b64 s[16:17], 0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 -; GCN-NEXT: s_mov_b64 s[62:63], 0 +; GCN-NEXT: s_mov_b64 s[54:55], 0 ; GCN-NEXT: s_mov_b64 s[18:19], 0 ; GCN-NEXT: s_cbranch_vccz .LBB0_9 ; GCN-NEXT: ; %bb.1: ; %Flow @@ -52,7 +52,7 @@ define hidden void @widget() { ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_mov_b64 s[34:35], s[4:5] ; GCN-NEXT: s_mov_b64 s[36:37], s[6:7] -; GCN-NEXT: s_mov_b64 s[46:47], s[8:9] +; GCN-NEXT: s_mov_b64 s[38:39], s[8:9] ; GCN-NEXT: s_mov_b64 s[48:49], s[10:11] ; GCN-NEXT: s_mov_b32 s50, s12 ; GCN-NEXT: s_mov_b32 s51, s13 @@ -67,15 +67,15 @@ define hidden void @widget() { ; GCN-NEXT: s_mov_b32 s15, s53 ; GCN-NEXT: s_mov_b64 s[4:5], s[34:35] ; GCN-NEXT: s_mov_b64 s[6:7], s[36:37] -; GCN-NEXT: s_mov_b64 s[8:9], s[46:47] +; GCN-NEXT: s_mov_b64 s[8:9], s[38:39] ; GCN-NEXT: s_mov_b64 s[10:11], s[48:49] ; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 ; GCN-NEXT: s_mov_b64 s[16:17], 0 -; GCN-NEXT: s_andn2_b64 s[18:19], s[62:63], exec +; GCN-NEXT: s_andn2_b64 s[18:19], s[54:55], exec ; GCN-NEXT: s_and_b64 s[20:21], vcc, exec -; GCN-NEXT: s_or_b64 s[62:63], s[18:19], s[20:21] +; GCN-NEXT: s_or_b64 s[54:55], s[18:19], s[20:21] ; GCN-NEXT: .LBB0_4: ; %Flow2 -; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[62:63] +; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[54:55] ; GCN-NEXT: s_xor_b64 s[18:19], exec, s[18:19] ; GCN-NEXT: s_cbranch_execz .LBB0_6 ; GCN-NEXT: ; %bb.5: ; %bb12 @@ -93,16 +93,16 @@ define hidden void @widget() { ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s63, v41, 15 -; GCN-NEXT: v_readlane_b32 s62, v41, 14 +; GCN-NEXT: v_readlane_b32 s55, v41, 15 +; GCN-NEXT: v_readlane_b32 s54, v41, 14 ; GCN-NEXT: v_readlane_b32 s53, v41, 13 ; GCN-NEXT: v_readlane_b32 s52, v41, 12 ; GCN-NEXT: v_readlane_b32 s51, v41, 11 ; GCN-NEXT: v_readlane_b32 s50, v41, 10 ; GCN-NEXT: v_readlane_b32 s49, v41, 9 ; GCN-NEXT: v_readlane_b32 s48, v41, 8 -; GCN-NEXT: v_readlane_b32 s47, v41, 7 -; GCN-NEXT: v_readlane_b32 s46, v41, 6 +; GCN-NEXT: v_readlane_b32 s39, v41, 7 +; GCN-NEXT: v_readlane_b32 s38, v41, 6 ; GCN-NEXT: v_readlane_b32 s37, v41, 5 ; GCN-NEXT: v_readlane_b32 s36, v41, 4 ; GCN-NEXT: v_readlane_b32 s35, v41, 3 @@ -119,7 +119,7 @@ define hidden void @widget() { ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .LBB0_9: ; %bb2 -; GCN-NEXT: v_cmp_eq_u32_e64 s[62:63], 21, v0 +; GCN-NEXT: v_cmp_eq_u32_e64 s[54:55], 21, v0 ; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 21, v0 ; GCN-NEXT: s_mov_b64 vcc, exec ; GCN-NEXT: s_cbranch_execnz .LBB0_2 @@ -272,34 +272,34 @@ define hidden void @blam() { ; GCN-NEXT: v_writelane_b32 v45, s35, 3 ; GCN-NEXT: v_writelane_b32 v45, s36, 4 ; GCN-NEXT: v_writelane_b32 v45, s37, 5 -; GCN-NEXT: v_writelane_b32 v45, s46, 6 -; GCN-NEXT: v_writelane_b32 v45, s47, 7 +; GCN-NEXT: v_writelane_b32 v45, s38, 6 +; GCN-NEXT: v_writelane_b32 v45, s39, 7 ; GCN-NEXT: v_writelane_b32 v45, s48, 8 ; GCN-NEXT: v_writelane_b32 v45, s49, 9 ; GCN-NEXT: v_writelane_b32 v45, s50, 10 ; GCN-NEXT: v_writelane_b32 v45, s51, 11 ; GCN-NEXT: v_writelane_b32 v45, s52, 12 ; GCN-NEXT: v_writelane_b32 v45, s53, 13 -; GCN-NEXT: v_writelane_b32 v45, s62, 14 -; GCN-NEXT: v_writelane_b32 v45, s63, 15 +; GCN-NEXT: v_writelane_b32 v45, s54, 14 +; GCN-NEXT: v_writelane_b32 v45, s55, 15 ; GCN-NEXT: v_writelane_b32 v45, s64, 16 ; GCN-NEXT: v_writelane_b32 v45, s65, 17 ; GCN-NEXT: v_writelane_b32 v45, s66, 18 ; GCN-NEXT: v_writelane_b32 v45, s67, 19 ; GCN-NEXT: v_writelane_b32 v45, s68, 20 ; GCN-NEXT: v_writelane_b32 v45, s69, 21 -; GCN-NEXT: v_writelane_b32 v45, s78, 22 -; GCN-NEXT: v_writelane_b32 v45, s79, 23 +; GCN-NEXT: v_writelane_b32 v45, s70, 22 +; GCN-NEXT: v_writelane_b32 v45, s71, 23 ; GCN-NEXT: v_writelane_b32 v45, s80, 24 ; GCN-NEXT: v_writelane_b32 v45, s81, 25 ; GCN-NEXT: v_mov_b32_e32 v40, v31 -; GCN-NEXT: s_mov_b32 s62, s15 -; GCN-NEXT: s_mov_b32 s63, s14 +; GCN-NEXT: s_mov_b32 s54, s15 +; GCN-NEXT: s_mov_b32 s55, s14 ; GCN-NEXT: s_mov_b32 s64, s13 ; GCN-NEXT: s_mov_b32 s65, s12 ; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] ; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[46:47], s[6:7] +; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] ; GCN-NEXT: s_mov_b64 s[48:49], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 @@ -329,7 +329,7 @@ define hidden void @blam() { ; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 2, v0 ; GCN-NEXT: s_mov_b64 s[4:5], -1 ; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_xor_b64 s[78:79], exec, s[8:9] +; GCN-NEXT: s_xor_b64 s[70:71], exec, s[8:9] ; GCN-NEXT: s_cbranch_execz .LBB1_12 ; GCN-NEXT: ; %bb.3: ; %bb6 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 @@ -342,13 +342,13 @@ define hidden void @blam() { ; GCN-NEXT: s_add_u32 s16, s16, spam@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, spam@rel32@hi+12 ; GCN-NEXT: s_mov_b64 s[4:5], s[48:49] -; GCN-NEXT: s_mov_b64 s[6:7], s[46:47] +; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] ; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] ; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] ; GCN-NEXT: s_mov_b32 s12, s65 ; GCN-NEXT: s_mov_b32 s13, s64 -; GCN-NEXT: s_mov_b32 s14, s63 -; GCN-NEXT: s_mov_b32 s15, s62 +; GCN-NEXT: s_mov_b32 s14, s55 +; GCN-NEXT: s_mov_b32 s15, s54 ; GCN-NEXT: v_mov_b32_e32 v31, v40 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: v_cmp_neq_f32_e32 vcc, 0, v0 @@ -394,7 +394,7 @@ define hidden void @blam() { ; GCN-NEXT: ; implicit-def: $vgpr0 ; GCN-NEXT: .LBB1_12: ; %Flow ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 -; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[78:79] +; GCN-NEXT: s_andn2_saveexec_b64 s[8:9], s[70:71] ; GCN-NEXT: s_cbranch_execz .LBB1_16 ; GCN-NEXT: ; %bb.13: ; %bb8 ; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 @@ -429,24 +429,24 @@ define hidden void @blam() { ; GCN-NEXT: s_or_b64 exec, exec, s[66:67] ; GCN-NEXT: v_readlane_b32 s81, v45, 25 ; GCN-NEXT: v_readlane_b32 s80, v45, 24 -; GCN-NEXT: v_readlane_b32 s79, v45, 23 -; GCN-NEXT: v_readlane_b32 s78, v45, 22 +; GCN-NEXT: v_readlane_b32 s71, v45, 23 +; GCN-NEXT: v_readlane_b32 s70, v45, 22 ; GCN-NEXT: v_readlane_b32 s69, v45, 21 ; GCN-NEXT: v_readlane_b32 s68, v45, 20 ; GCN-NEXT: v_readlane_b32 s67, v45, 19 ; GCN-NEXT: v_readlane_b32 s66, v45, 18 ; GCN-NEXT: v_readlane_b32 s65, v45, 17 ; GCN-NEXT: v_readlane_b32 s64, v45, 16 -; GCN-NEXT: v_readlane_b32 s63, v45, 15 -; GCN-NEXT: v_readlane_b32 s62, v45, 14 +; GCN-NEXT: v_readlane_b32 s55, v45, 15 +; GCN-NEXT: v_readlane_b32 s54, v45, 14 ; GCN-NEXT: v_readlane_b32 s53, v45, 13 ; GCN-NEXT: v_readlane_b32 s52, v45, 12 ; GCN-NEXT: v_readlane_b32 s51, v45, 11 ; GCN-NEXT: v_readlane_b32 s50, v45, 10 ; GCN-NEXT: v_readlane_b32 s49, v45, 9 ; GCN-NEXT: v_readlane_b32 s48, v45, 8 -; GCN-NEXT: v_readlane_b32 s47, v45, 7 -; GCN-NEXT: v_readlane_b32 s46, v45, 6 +; GCN-NEXT: v_readlane_b32 s39, v45, 7 +; GCN-NEXT: v_readlane_b32 s38, v45, 6 ; GCN-NEXT: v_readlane_b32 s37, v45, 5 ; GCN-NEXT: v_readlane_b32 s36, v45, 4 ; GCN-NEXT: v_readlane_b32 s35, v45, 3 diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir index 670b7d7b8893b..1e815f76ee149 100644 --- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir +++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir @@ -37,12 +37,12 @@ body: | ; MUBUF-LABEL: name: use_restore_frame_reg ; MUBUF: bb.0: ; MUBUF-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1 + ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1 ; MUBUF-NEXT: {{ $}} - ; MUBUF-NEXT: $sgpr38 = frame-setup COPY $sgpr33 + ; MUBUF-NEXT: $sgpr40 = frame-setup COPY $sgpr33 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc ; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; MUBUF-NEXT: $sgpr39 = frame-setup COPY $sgpr34 + ; MUBUF-NEXT: $sgpr41 = frame-setup COPY $sgpr34 ; MUBUF-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 11010048, implicit-def dead $scc ; MUBUF-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc @@ -57,54 +57,54 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39 + ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.2: - ; MUBUF-NEXT: liveins: $sgpr38, $sgpr39 + ; MUBUF-NEXT: liveins: $sgpr40, $sgpr41 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 - ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 + ; MUBUF-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; MUBUF-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; MUBUF-NEXT: S_ENDPGM 0 ; ; FLATSCR-LABEL: name: use_restore_frame_reg ; FLATSCR: bb.0: ; FLATSCR-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39, $vgpr1 + ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41, $vgpr1 ; FLATSCR-NEXT: {{ $}} - ; FLATSCR-NEXT: $sgpr38 = frame-setup COPY $sgpr33 + ; FLATSCR-NEXT: $sgpr40 = frame-setup COPY $sgpr33 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc ; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; FLATSCR-NEXT: $sgpr39 = frame-setup COPY $sgpr34 + ; FLATSCR-NEXT: $sgpr41 = frame-setup COPY $sgpr34 ; FLATSCR-NEXT: $sgpr34 = frame-setup COPY $sgpr32 ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 172032, implicit-def dead $scc ; FLATSCR-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc ; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc - ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc - ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc - ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40 - ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr40, implicit $exec - ; FLATSCR-NEXT: $sgpr40 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc - ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr40, 0, implicit-def $scc - ; FLATSCR-NEXT: $sgpr40 = S_BITSET0_B32 0, $sgpr40 - ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr40, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 + ; FLATSCR-NEXT: $sgpr42 = S_ADDC_U32 $sgpr33, 8192, implicit-def $scc, implicit $scc + ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr42, 0, implicit-def $scc + ; FLATSCR-NEXT: $sgpr42 = S_BITSET0_B32 0, $sgpr42 + ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr42, implicit $exec + ; FLATSCR-NEXT: $sgpr42 = S_ADDC_U32 $sgpr33, 155648, implicit-def $scc, implicit $scc + ; FLATSCR-NEXT: S_BITCMP1_B32 $sgpr42, 0, implicit-def $scc + ; FLATSCR-NEXT: $sgpr42 = S_BITSET0_B32 0, $sgpr42 + ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $sgpr42, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31 ; FLATSCR-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: bb.1: ; FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39 + ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41 ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: S_NOP 0 ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: bb.2: - ; FLATSCR-NEXT: liveins: $sgpr38, $sgpr39 + ; FLATSCR-NEXT: liveins: $sgpr40, $sgpr41 ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: $sgpr32 = frame-destroy COPY $sgpr34 - ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr39 - ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr38 + ; FLATSCR-NEXT: $sgpr34 = frame-destroy COPY $sgpr41 + ; FLATSCR-NEXT: $sgpr33 = frame-destroy COPY $sgpr40 ; FLATSCR-NEXT: S_ENDPGM 0 bb.0: liveins: $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll index d0798b261abf0..2ee62d13fcc51 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll @@ -33,16 +33,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX900-NEXT: v_writelane_b32 v63, s36, 0 ; GFX900-NEXT: v_writelane_b32 v63, s37, 1 -; GFX900-NEXT: v_writelane_b32 v63, s46, 2 -; GFX900-NEXT: v_writelane_b32 v63, s47, 3 +; GFX900-NEXT: v_writelane_b32 v63, s38, 2 +; GFX900-NEXT: v_writelane_b32 v63, s39, 3 ; GFX900-NEXT: v_writelane_b32 v63, s48, 4 ; GFX900-NEXT: v_writelane_b32 v63, s49, 5 ; GFX900-NEXT: v_writelane_b32 v63, s50, 6 ; GFX900-NEXT: v_writelane_b32 v63, s51, 7 ; GFX900-NEXT: v_writelane_b32 v63, s52, 8 ; GFX900-NEXT: v_writelane_b32 v63, s53, 9 -; GFX900-NEXT: v_writelane_b32 v63, s62, 10 -; GFX900-NEXT: v_writelane_b32 v63, s63, 11 +; GFX900-NEXT: v_writelane_b32 v63, s54, 10 +; GFX900-NEXT: v_writelane_b32 v63, s55, 11 ; GFX900-NEXT: v_writelane_b32 v63, s64, 12 ; GFX900-NEXT: v_writelane_b32 v63, s65, 13 ; GFX900-NEXT: v_writelane_b32 v63, s66, 14 @@ -148,16 +148,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX900-NEXT: v_readlane_b32 s66, v63, 14 ; GFX900-NEXT: v_readlane_b32 s65, v63, 13 ; GFX900-NEXT: v_readlane_b32 s64, v63, 12 -; GFX900-NEXT: v_readlane_b32 s63, v63, 11 -; GFX900-NEXT: v_readlane_b32 s62, v63, 10 +; GFX900-NEXT: v_readlane_b32 s55, v63, 11 +; GFX900-NEXT: v_readlane_b32 s54, v63, 10 ; GFX900-NEXT: v_readlane_b32 s53, v63, 9 ; GFX900-NEXT: v_readlane_b32 s52, v63, 8 ; GFX900-NEXT: v_readlane_b32 s51, v63, 7 ; GFX900-NEXT: v_readlane_b32 s50, v63, 6 ; GFX900-NEXT: v_readlane_b32 s49, v63, 5 ; GFX900-NEXT: v_readlane_b32 s48, v63, 4 -; GFX900-NEXT: v_readlane_b32 s47, v63, 3 -; GFX900-NEXT: v_readlane_b32 s46, v63, 2 +; GFX900-NEXT: v_readlane_b32 s39, v63, 3 +; GFX900-NEXT: v_readlane_b32 s38, v63, 2 ; GFX900-NEXT: v_readlane_b32 s37, v63, 1 ; GFX900-NEXT: v_readlane_b32 s36, v63, 0 ; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -204,16 +204,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX906-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX906-NEXT: v_writelane_b32 v63, s36, 0 ; GFX906-NEXT: v_writelane_b32 v63, s37, 1 -; GFX906-NEXT: v_writelane_b32 v63, s46, 2 -; GFX906-NEXT: v_writelane_b32 v63, s47, 3 +; GFX906-NEXT: v_writelane_b32 v63, s38, 2 +; GFX906-NEXT: v_writelane_b32 v63, s39, 3 ; GFX906-NEXT: v_writelane_b32 v63, s48, 4 ; GFX906-NEXT: v_writelane_b32 v63, s49, 5 ; GFX906-NEXT: v_writelane_b32 v63, s50, 6 ; GFX906-NEXT: v_writelane_b32 v63, s51, 7 ; GFX906-NEXT: v_writelane_b32 v63, s52, 8 ; GFX906-NEXT: v_writelane_b32 v63, s53, 9 -; GFX906-NEXT: v_writelane_b32 v63, s62, 10 -; GFX906-NEXT: v_writelane_b32 v63, s63, 11 +; GFX906-NEXT: v_writelane_b32 v63, s54, 10 +; GFX906-NEXT: v_writelane_b32 v63, s55, 11 ; GFX906-NEXT: v_writelane_b32 v63, s64, 12 ; GFX906-NEXT: v_writelane_b32 v63, s65, 13 ; GFX906-NEXT: v_writelane_b32 v63, s66, 14 @@ -319,16 +319,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX906-NEXT: v_readlane_b32 s66, v63, 14 ; GFX906-NEXT: v_readlane_b32 s65, v63, 13 ; GFX906-NEXT: v_readlane_b32 s64, v63, 12 -; GFX906-NEXT: v_readlane_b32 s63, v63, 11 -; GFX906-NEXT: v_readlane_b32 s62, v63, 10 +; GFX906-NEXT: v_readlane_b32 s55, v63, 11 +; GFX906-NEXT: v_readlane_b32 s54, v63, 10 ; GFX906-NEXT: v_readlane_b32 s53, v63, 9 ; GFX906-NEXT: v_readlane_b32 s52, v63, 8 ; GFX906-NEXT: v_readlane_b32 s51, v63, 7 ; GFX906-NEXT: v_readlane_b32 s50, v63, 6 ; GFX906-NEXT: v_readlane_b32 s49, v63, 5 ; GFX906-NEXT: v_readlane_b32 s48, v63, 4 -; GFX906-NEXT: v_readlane_b32 s47, v63, 3 -; GFX906-NEXT: v_readlane_b32 s46, v63, 2 +; GFX906-NEXT: v_readlane_b32 s39, v63, 3 +; GFX906-NEXT: v_readlane_b32 s38, v63, 2 ; GFX906-NEXT: v_readlane_b32 s37, v63, 1 ; GFX906-NEXT: v_readlane_b32 s36, v63, 0 ; GFX906-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -374,16 +374,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX908-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse ; GFX908-NEXT: v_writelane_b32 v62, s36, 0 ; GFX908-NEXT: v_writelane_b32 v62, s37, 1 -; GFX908-NEXT: v_writelane_b32 v62, s46, 2 -; GFX908-NEXT: v_writelane_b32 v62, s47, 3 +; GFX908-NEXT: v_writelane_b32 v62, s38, 2 +; GFX908-NEXT: v_writelane_b32 v62, s39, 3 ; GFX908-NEXT: v_writelane_b32 v62, s48, 4 ; GFX908-NEXT: v_writelane_b32 v62, s49, 5 ; GFX908-NEXT: v_writelane_b32 v62, s50, 6 ; GFX908-NEXT: v_writelane_b32 v62, s51, 7 ; GFX908-NEXT: v_writelane_b32 v62, s52, 8 ; GFX908-NEXT: v_writelane_b32 v62, s53, 9 -; GFX908-NEXT: v_writelane_b32 v62, s62, 10 -; GFX908-NEXT: v_writelane_b32 v62, s63, 11 +; GFX908-NEXT: v_writelane_b32 v62, s54, 10 +; GFX908-NEXT: v_writelane_b32 v62, s55, 11 ; GFX908-NEXT: v_writelane_b32 v62, s64, 12 ; GFX908-NEXT: v_writelane_b32 v62, s65, 13 ; GFX908-NEXT: v_writelane_b32 v62, s66, 14 @@ -493,16 +493,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX908-NEXT: v_readlane_b32 s66, v62, 14 ; GFX908-NEXT: v_readlane_b32 s65, v62, 13 ; GFX908-NEXT: v_readlane_b32 s64, v62, 12 -; GFX908-NEXT: v_readlane_b32 s63, v62, 11 -; GFX908-NEXT: v_readlane_b32 s62, v62, 10 +; GFX908-NEXT: v_readlane_b32 s55, v62, 11 +; GFX908-NEXT: v_readlane_b32 s54, v62, 10 ; GFX908-NEXT: v_readlane_b32 s53, v62, 9 ; GFX908-NEXT: v_readlane_b32 s52, v62, 8 ; GFX908-NEXT: v_readlane_b32 s51, v62, 7 ; GFX908-NEXT: v_readlane_b32 s50, v62, 6 ; GFX908-NEXT: v_readlane_b32 s49, v62, 5 ; GFX908-NEXT: v_readlane_b32 s48, v62, 4 -; GFX908-NEXT: v_readlane_b32 s47, v62, 3 -; GFX908-NEXT: v_readlane_b32 s46, v62, 2 +; GFX908-NEXT: v_readlane_b32 s39, v62, 3 +; GFX908-NEXT: v_readlane_b32 s38, v62, 2 ; GFX908-NEXT: v_readlane_b32 s37, v62, 1 ; GFX908-NEXT: v_readlane_b32 s36, v62, 0 ; GFX908-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse @@ -548,16 +548,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX90a-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse ; GFX90a-NEXT: v_writelane_b32 v63, s36, 0 ; GFX90a-NEXT: v_writelane_b32 v63, s37, 1 -; GFX90a-NEXT: v_writelane_b32 v63, s46, 2 -; GFX90a-NEXT: v_writelane_b32 v63, s47, 3 +; GFX90a-NEXT: v_writelane_b32 v63, s38, 2 +; GFX90a-NEXT: v_writelane_b32 v63, s39, 3 ; GFX90a-NEXT: v_writelane_b32 v63, s48, 4 ; GFX90a-NEXT: v_writelane_b32 v63, s49, 5 ; GFX90a-NEXT: v_writelane_b32 v63, s50, 6 ; GFX90a-NEXT: v_writelane_b32 v63, s51, 7 ; GFX90a-NEXT: v_writelane_b32 v63, s52, 8 ; GFX90a-NEXT: v_writelane_b32 v63, s53, 9 -; GFX90a-NEXT: v_writelane_b32 v63, s62, 10 -; GFX90a-NEXT: v_writelane_b32 v63, s63, 11 +; GFX90a-NEXT: v_writelane_b32 v63, s54, 10 +; GFX90a-NEXT: v_writelane_b32 v63, s55, 11 ; GFX90a-NEXT: v_writelane_b32 v63, s64, 12 ; GFX90a-NEXT: v_writelane_b32 v63, s65, 13 ; GFX90a-NEXT: v_writelane_b32 v63, s66, 14 @@ -663,16 +663,16 @@ define i32 @test_tuple(<16 x i64> %0) { ; GFX90a-NEXT: v_readlane_b32 s66, v63, 14 ; GFX90a-NEXT: v_readlane_b32 s65, v63, 13 ; GFX90a-NEXT: v_readlane_b32 s64, v63, 12 -; GFX90a-NEXT: v_readlane_b32 s63, v63, 11 -; GFX90a-NEXT: v_readlane_b32 s62, v63, 10 +; GFX90a-NEXT: v_readlane_b32 s55, v63, 11 +; GFX90a-NEXT: v_readlane_b32 s54, v63, 10 ; GFX90a-NEXT: v_readlane_b32 s53, v63, 9 ; GFX90a-NEXT: v_readlane_b32 s52, v63, 8 ; GFX90a-NEXT: v_readlane_b32 s51, v63, 7 ; GFX90a-NEXT: v_readlane_b32 s50, v63, 6 ; GFX90a-NEXT: v_readlane_b32 s49, v63, 5 ; GFX90a-NEXT: v_readlane_b32 s48, v63, 4 -; GFX90a-NEXT: v_readlane_b32 s47, v63, 3 -; GFX90a-NEXT: v_readlane_b32 s46, v63, 2 +; GFX90a-NEXT: v_readlane_b32 s39, v63, 3 +; GFX90a-NEXT: v_readlane_b32 s38, v63, 2 ; GFX90a-NEXT: v_readlane_b32 s37, v63, 1 ; GFX90a-NEXT: v_readlane_b32 s36, v63, 0 ; GFX90a-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse From 1bde981f60a8014728012b4b19dd73072a41bd48 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Sat, 8 Mar 2025 00:14:23 -0500 Subject: [PATCH 3/3] rebase and fix conflicts --- .../CodeGen/AMDGPU/call-argument-types.ll | 63 +-- .../identical-subrange-spill-infloop.ll | 406 ++++++----------- llvm/test/CodeGen/AMDGPU/issue48473.mir | 2 +- .../AMDGPU/llvm.amdgcn.readfirstlane.ll | 416 ++++++------------ .../AMDGPU/tuple-allocation-failure.ll | 250 +++++------ 5 files changed, 427 insertions(+), 710 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 2365c68a7cb0b..3451e389fef8b 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -1147,12 +1147,11 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b32 s0, 0 ; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b64 s[6:7], s[0:1] +; VI-NEXT: s_mov_b64 s[0:1], 0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1170,12 +1169,11 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b32 s0, 0 ; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b64 s[6:7], s[0:1] +; CI-NEXT: s_mov_b64 s[0:1], 0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1193,12 +1191,11 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GFX9-NEXT: s_mov_b32 s38, -1 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] +; GFX9-NEXT: s_mov_b64 s[0:1], 0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1212,10 +1209,9 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; ; GFX11-LABEL: test_call_external_void_func_v2i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b64 s[4:5], 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 -; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: s_getpc_b64 s[2:3] ; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4 ; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12 @@ -1229,11 +1225,10 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 -; HSA-NEXT: s_mov_b32 s8, 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 +; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 -; HSA-NEXT: s_mov_b32 s9, s8 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 @@ -1357,12 +1352,11 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b32 s0, 0 ; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b64 s[6:7], s[0:1] +; VI-NEXT: s_mov_b64 s[0:1], 0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1382,12 +1376,11 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b32 s0, 0 ; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b64 s[6:7], s[0:1] +; CI-NEXT: s_mov_b64 s[0:1], 0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1407,12 +1400,11 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GFX9-NEXT: s_mov_b32 s38, -1 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] +; GFX9-NEXT: s_mov_b64 s[0:1], 0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1428,10 +1420,9 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; ; GFX11-LABEL: test_call_external_void_func_v3i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b64 s[4:5], 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 -; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: s_getpc_b64 s[2:3] @@ -1446,11 +1437,10 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 -; HSA-NEXT: s_mov_b32 s8, 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 +; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 -; HSA-NEXT: s_mov_b32 s9, s8 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 @@ -1477,12 +1467,11 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; VI-NEXT: s_mov_b32 s38, -1 ; VI-NEXT: s_mov_b32 s39, 0xe80000 -; VI-NEXT: s_mov_b64 s[6:7], s[0:1] -; VI-NEXT: s_mov_b32 s0, 0 ; VI-NEXT: s_add_u32 s36, s36, s3 +; VI-NEXT: s_mov_b64 s[6:7], s[0:1] +; VI-NEXT: s_mov_b64 s[0:1], 0 ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s1, s0 ; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_addc_u32 s37, s37, 0 ; VI-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1504,12 +1493,11 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; CI-NEXT: s_mov_b32 s38, -1 ; CI-NEXT: s_mov_b32 s39, 0xe8f000 -; CI-NEXT: s_mov_b64 s[6:7], s[0:1] -; CI-NEXT: s_mov_b32 s0, 0 ; CI-NEXT: s_add_u32 s36, s36, s3 +; CI-NEXT: s_mov_b64 s[6:7], s[0:1] +; CI-NEXT: s_mov_b64 s[0:1], 0 ; CI-NEXT: s_mov_b32 s3, 0xf000 ; CI-NEXT: s_mov_b32 s2, -1 -; CI-NEXT: s_mov_b32 s1, s0 ; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; CI-NEXT: s_addc_u32 s37, s37, 0 ; CI-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1531,12 +1519,11 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 ; GFX9-NEXT: s_mov_b32 s38, -1 ; GFX9-NEXT: s_mov_b32 s39, 0xe00000 -; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] -; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_add_u32 s36, s36, s3 +; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] +; GFX9-NEXT: s_mov_b64 s[0:1], 0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 -; GFX9-NEXT: s_mov_b32 s1, s0 ; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 ; GFX9-NEXT: s_addc_u32 s37, s37, 0 ; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -1554,10 +1541,9 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; ; GFX11-LABEL: test_call_external_void_func_v4i64: ; GFX11: ; %bb.0: -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b64 s[4:5], 0 ; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; GFX11-NEXT: s_mov_b32 s6, -1 -; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 ; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 ; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 @@ -1573,11 +1559,10 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { ; HSA: ; %bb.0: ; HSA-NEXT: s_add_i32 s6, s6, s9 ; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 -; HSA-NEXT: s_mov_b32 s8, 0 ; HSA-NEXT: s_add_u32 s0, s0, s9 +; HSA-NEXT: s_mov_b64 s[8:9], 0 ; HSA-NEXT: s_mov_b32 s11, 0x1100f000 ; HSA-NEXT: s_mov_b32 s10, -1 -; HSA-NEXT: s_mov_b32 s9, s8 ; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 ; HSA-NEXT: s_addc_u32 s1, s1, 0 ; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 53c4a9cd229aa..8dbd6c5d133ea 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -8,140 +8,98 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: v_writelane_b32 v5, s36, 0 -; CHECK-NEXT: v_writelane_b32 v5, s37, 1 -; CHECK-NEXT: v_writelane_b32 v5, s38, 2 -; CHECK-NEXT: v_writelane_b32 v5, s39, 3 -; CHECK-NEXT: v_writelane_b32 v5, s48, 4 -; CHECK-NEXT: v_writelane_b32 v5, s49, 5 -; CHECK-NEXT: v_writelane_b32 v5, s50, 6 -; CHECK-NEXT: v_writelane_b32 v5, s51, 7 -; CHECK-NEXT: v_writelane_b32 v5, s52, 8 -; CHECK-NEXT: v_writelane_b32 v5, s53, 9 +; CHECK-NEXT: v_writelane_b32 v5, s30, 0 +; CHECK-NEXT: v_writelane_b32 v5, s31, 1 +; CHECK-NEXT: v_writelane_b32 v5, s34, 2 +; CHECK-NEXT: v_writelane_b32 v5, s35, 3 +; CHECK-NEXT: v_writelane_b32 v5, s36, 4 +; CHECK-NEXT: v_writelane_b32 v5, s37, 5 +; CHECK-NEXT: v_writelane_b32 v5, s38, 6 ; CHECK-NEXT: s_getpc_b64 s[24:25] -; CHECK-NEXT: v_writelane_b32 v5, s54, 10 -; CHECK-NEXT: s_movk_i32 s4, 0xf0 -; CHECK-NEXT: s_mov_b32 s5, s24 -; CHECK-NEXT: v_writelane_b32 v5, s55, 11 -; CHECK-NEXT: s_load_dwordx16 s[44:59], s[4:5], 0x0 -; CHECK-NEXT: ; implicit-def: $vgpr7 : SGPR spill to VGPR lane -; CHECK-NEXT: s_mov_b64 s[4:5], 0 -; CHECK-NEXT: s_load_dwordx4 s[40:43], s[4:5], 0x0 -; CHECK-NEXT: s_movk_i32 s20, 0x130 +; CHECK-NEXT: v_writelane_b32 v5, s39, 7 +; CHECK-NEXT: s_movk_i32 s20, 0xf0 ; CHECK-NEXT: s_mov_b32 s21, s24 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v7, s44, 0 -; CHECK-NEXT: v_writelane_b32 v7, s45, 1 -; CHECK-NEXT: v_writelane_b32 v7, s46, 2 -; CHECK-NEXT: v_writelane_b32 v7, s47, 3 -; CHECK-NEXT: v_writelane_b32 v7, s48, 4 -; CHECK-NEXT: v_writelane_b32 v7, s49, 5 -; CHECK-NEXT: v_writelane_b32 v7, s50, 6 -; CHECK-NEXT: v_writelane_b32 v7, s51, 7 -; CHECK-NEXT: v_writelane_b32 v7, s52, 8 -; CHECK-NEXT: v_writelane_b32 v7, s53, 9 -; CHECK-NEXT: v_writelane_b32 v7, s54, 10 +; CHECK-NEXT: v_writelane_b32 v5, s48, 8 ; CHECK-NEXT: s_load_dwordx16 s[4:19], s[20:21], 0x0 -; CHECK-NEXT: v_writelane_b32 v7, s55, 11 -; CHECK-NEXT: v_writelane_b32 v7, s56, 12 -; CHECK-NEXT: s_mov_b32 s20, 0 +; CHECK-NEXT: s_mov_b64 s[20:21], 0 +; CHECK-NEXT: v_writelane_b32 v5, s49, 9 +; CHECK-NEXT: s_load_dwordx4 s[20:23], s[20:21], 0x0 +; CHECK-NEXT: v_writelane_b32 v5, s50, 10 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s22, 0x130 +; CHECK-NEXT: s_mov_b32 s23, s24 +; CHECK-NEXT: v_writelane_b32 v5, s51, 11 +; CHECK-NEXT: s_load_dwordx16 s[36:51], s[22:23], 0x0 +; CHECK-NEXT: s_mov_b32 s28, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 -; CHECK-NEXT: v_writelane_b32 v7, s57, 13 -; CHECK-NEXT: v_mov_b32_e32 v2, s40 +; CHECK-NEXT: v_mov_b32_e32 v2, s20 ; CHECK-NEXT: v_mov_b32_e32 v3, v1 -; CHECK-NEXT: s_mov_b32 s21, s20 -; CHECK-NEXT: s_mov_b32 s22, s20 -; CHECK-NEXT: s_mov_b32 s23, s20 -; CHECK-NEXT: v_writelane_b32 v7, s58, 14 -; CHECK-NEXT: v_writelane_b32 v7, s59, 15 -; CHECK-NEXT: image_sample_lz v3, v[2:3], s[52:59], s[20:23] dmask:0x1 +; CHECK-NEXT: s_mov_b32 s29, s28 +; CHECK-NEXT: s_mov_b32 s30, s28 +; CHECK-NEXT: s_mov_b32 s31, s28 +; CHECK-NEXT: image_sample_lz v3, v[2:3], s[12:19], s[28:31] dmask:0x1 ; CHECK-NEXT: v_mov_b32_e32 v2, v1 +; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane +; CHECK-NEXT: v_writelane_b32 v5, s52, 12 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v7, s4, 16 -; CHECK-NEXT: v_writelane_b32 v7, s5, 17 -; CHECK-NEXT: v_writelane_b32 v7, s6, 18 -; CHECK-NEXT: v_writelane_b32 v7, s7, 19 -; CHECK-NEXT: v_writelane_b32 v7, s8, 20 -; CHECK-NEXT: v_writelane_b32 v7, s9, 21 -; CHECK-NEXT: image_sample_lz v4, v[1:2], s[4:11], s[20:23] dmask:0x1 -; CHECK-NEXT: v_writelane_b32 v7, s10, 22 -; CHECK-NEXT: v_writelane_b32 v7, s11, 23 -; CHECK-NEXT: v_writelane_b32 v7, s12, 24 -; CHECK-NEXT: v_writelane_b32 v7, s13, 25 -; CHECK-NEXT: v_writelane_b32 v7, s14, 26 -; CHECK-NEXT: v_writelane_b32 v7, s15, 27 -; CHECK-NEXT: v_writelane_b32 v7, s16, 28 -; CHECK-NEXT: v_writelane_b32 v7, s17, 29 -; CHECK-NEXT: v_writelane_b32 v7, s18, 30 -; CHECK-NEXT: s_mov_b32 s26, 48 -; CHECK-NEXT: s_mov_b32 s27, s24 -; CHECK-NEXT: v_writelane_b32 v7, s19, 31 -; CHECK-NEXT: s_load_dwordx8 s[4:11], s[26:27], 0x0 -; CHECK-NEXT: v_writelane_b32 v5, s64, 12 -; CHECK-NEXT: v_writelane_b32 v5, s65, 13 -; CHECK-NEXT: v_writelane_b32 v5, s66, 14 -; CHECK-NEXT: s_movk_i32 s28, 0x1f0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_writelane_b32 v7, s4, 32 -; CHECK-NEXT: v_writelane_b32 v7, s5, 33 -; CHECK-NEXT: v_writelane_b32 v7, s6, 34 -; CHECK-NEXT: v_writelane_b32 v7, s7, 35 -; CHECK-NEXT: v_writelane_b32 v7, s8, 36 -; CHECK-NEXT: v_writelane_b32 v7, s9, 37 -; CHECK-NEXT: s_movk_i32 s72, 0x2f0 -; CHECK-NEXT: s_mov_b32 s29, s24 -; CHECK-NEXT: s_mov_b32 s73, s24 -; CHECK-NEXT: v_writelane_b32 v7, s10, 38 -; CHECK-NEXT: v_writelane_b32 v5, s67, 15 -; CHECK-NEXT: v_writelane_b32 v7, s11, 39 -; CHECK-NEXT: s_load_dwordx16 s[52:67], s[28:29], 0x0 -; CHECK-NEXT: s_load_dwordx16 s[4:19], s[72:73], 0x0 +; CHECK-NEXT: v_writelane_b32 v6, s36, 0 +; CHECK-NEXT: v_writelane_b32 v6, s37, 1 +; CHECK-NEXT: v_writelane_b32 v6, s38, 2 +; CHECK-NEXT: v_writelane_b32 v6, s39, 3 +; CHECK-NEXT: v_writelane_b32 v6, s40, 4 +; CHECK-NEXT: v_writelane_b32 v6, s41, 5 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[36:43], s[28:31] dmask:0x1 +; CHECK-NEXT: v_writelane_b32 v6, s42, 6 +; CHECK-NEXT: v_writelane_b32 v6, s43, 7 +; CHECK-NEXT: v_writelane_b32 v6, s44, 8 +; CHECK-NEXT: v_writelane_b32 v6, s45, 9 +; CHECK-NEXT: v_writelane_b32 v5, s53, 13 +; CHECK-NEXT: v_writelane_b32 v6, s46, 10 +; CHECK-NEXT: v_writelane_b32 v5, s54, 14 +; CHECK-NEXT: v_writelane_b32 v6, s47, 11 +; CHECK-NEXT: v_writelane_b32 v5, s55, 15 +; CHECK-NEXT: v_writelane_b32 v6, s48, 12 +; CHECK-NEXT: v_writelane_b32 v5, s64, 16 +; CHECK-NEXT: v_writelane_b32 v6, s49, 13 +; CHECK-NEXT: v_writelane_b32 v5, s65, 17 +; CHECK-NEXT: v_writelane_b32 v6, s50, 14 +; CHECK-NEXT: v_writelane_b32 v5, s66, 18 +; CHECK-NEXT: v_writelane_b32 v6, s51, 15 +; CHECK-NEXT: s_mov_b32 s40, 48 +; CHECK-NEXT: s_movk_i32 s56, 0x1f0 +; CHECK-NEXT: s_movk_i32 s34, 0x2f0 +; CHECK-NEXT: s_mov_b32 s41, s24 +; CHECK-NEXT: s_mov_b32 s57, s24 +; CHECK-NEXT: s_mov_b32 s35, s24 +; CHECK-NEXT: v_writelane_b32 v5, s67, 19 +; CHECK-NEXT: s_load_dwordx8 s[20:27], s[40:41], 0x0 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: s_load_dwordx16 s[36:51], s[56:57], 0x0 ; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: s_load_dwordx16 s[52:67], s[34:35], 0x0 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; CHECK-NEXT: s_xor_b64 s[24:25], vcc, -1 +; CHECK-NEXT: v_writelane_b32 v5, s68, 20 +; CHECK-NEXT: s_xor_b64 s[72:73], vcc, -1 +; CHECK-NEXT: v_writelane_b32 v5, s69, 21 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_mul_f32_e32 v0, v4, v3 -; CHECK-NEXT: s_and_saveexec_b64 s[26:27], s[24:25] -; CHECK-NEXT: s_xor_b64 s[26:27], exec, s[26:27] +; CHECK-NEXT: s_and_saveexec_b64 vcc, s[72:73] +; CHECK-NEXT: s_xor_b64 s[34:35], exec, vcc ; CHECK-NEXT: s_cbranch_execz .LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %bb48 -; CHECK-NEXT: v_readlane_b32 s36, v7, 0 -; CHECK-NEXT: v_readlane_b32 s44, v7, 8 -; CHECK-NEXT: v_readlane_b32 s45, v7, 9 -; CHECK-NEXT: v_readlane_b32 s46, v7, 10 -; CHECK-NEXT: v_readlane_b32 s47, v7, 11 -; CHECK-NEXT: v_readlane_b32 s48, v7, 12 -; CHECK-NEXT: v_readlane_b32 s49, v7, 13 -; CHECK-NEXT: v_readlane_b32 s50, v7, 14 -; CHECK-NEXT: v_readlane_b32 s51, v7, 15 -; CHECK-NEXT: s_and_b64 vcc, exec, -1 -; CHECK-NEXT: v_readlane_b32 s37, v7, 1 -; CHECK-NEXT: v_readlane_b32 s38, v7, 2 -; CHECK-NEXT: v_readlane_b32 s39, v7, 3 -; CHECK-NEXT: v_readlane_b32 s40, v7, 4 -; CHECK-NEXT: image_sample_lz v3, v[1:2], s[44:51], s[20:23] dmask:0x1 +; CHECK-NEXT: image_sample_lz v3, v[1:2], s[12:19], s[28:31] dmask:0x1 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 -; CHECK-NEXT: v_readlane_b32 s41, v7, 5 -; CHECK-NEXT: v_readlane_b32 s42, v7, 6 -; CHECK-NEXT: v_readlane_b32 s43, v7, 7 +; CHECK-NEXT: s_and_b64 vcc, exec, -1 ; CHECK-NEXT: .LBB0_2: ; %bb50 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: v_readlane_b32 s40, v7, 32 -; CHECK-NEXT: v_readlane_b32 s44, v7, 36 -; CHECK-NEXT: v_readlane_b32 s45, v7, 37 -; CHECK-NEXT: v_readlane_b32 s46, v7, 38 -; CHECK-NEXT: v_readlane_b32 s47, v7, 39 -; CHECK-NEXT: s_mov_b32 s21, s20 -; CHECK-NEXT: s_mov_b32 s22, s20 -; CHECK-NEXT: s_mov_b32 s23, s20 -; CHECK-NEXT: v_readlane_b32 s41, v7, 33 -; CHECK-NEXT: v_readlane_b32 s42, v7, 34 +; CHECK-NEXT: s_mov_b32 s29, s28 +; CHECK-NEXT: s_mov_b32 s30, s28 +; CHECK-NEXT: s_mov_b32 s31, s28 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: image_sample_lz v4, v[1:2], s[60:67], s[44:47] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s43, v7, 35 -; CHECK-NEXT: image_sample_lz v1, v[1:2], s[12:19], s[20:23] dmask:0x1 +; CHECK-NEXT: image_sample_lz v4, v[1:2], s[44:51], s[24:27] dmask:0x1 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: image_sample_lz v1, v[1:2], s[60:67], s[28:31] dmask:0x1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v1, v1, v4 ; CHECK-NEXT: v_mul_f32_e32 v1, v1, v0 @@ -149,159 +107,69 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: s_mov_b64 vcc, vcc ; CHECK-NEXT: s_cbranch_vccnz .LBB0_2 ; CHECK-NEXT: .LBB0_3: ; %Flow14 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_readlane_b32 s12, v7, 32 -; CHECK-NEXT: v_readlane_b32 s13, v7, 33 -; CHECK-NEXT: v_readlane_b32 s14, v7, 34 -; CHECK-NEXT: v_readlane_b32 s15, v7, 35 -; CHECK-NEXT: v_readlane_b32 s16, v7, 36 -; CHECK-NEXT: v_readlane_b32 s17, v7, 37 -; CHECK-NEXT: v_readlane_b32 s18, v7, 38 -; CHECK-NEXT: v_readlane_b32 s19, v7, 39 -; CHECK-NEXT: v_writelane_b32 v7, s4, 40 -; CHECK-NEXT: v_writelane_b32 v7, s5, 41 -; CHECK-NEXT: v_writelane_b32 v7, s6, 42 -; CHECK-NEXT: v_writelane_b32 v7, s7, 43 -; CHECK-NEXT: v_writelane_b32 v7, s8, 44 -; CHECK-NEXT: v_writelane_b32 v7, s9, 45 -; CHECK-NEXT: v_writelane_b32 v7, s10, 46 -; CHECK-NEXT: v_writelane_b32 v7, s11, 47 -; CHECK-NEXT: v_writelane_b32 v7, s12, 48 -; CHECK-NEXT: v_writelane_b32 v7, s13, 49 -; CHECK-NEXT: v_writelane_b32 v7, s14, 50 -; CHECK-NEXT: v_writelane_b32 v7, s15, 51 -; CHECK-NEXT: v_writelane_b32 v7, s16, 52 -; CHECK-NEXT: v_writelane_b32 v7, s17, 53 -; CHECK-NEXT: v_writelane_b32 v7, s18, 54 -; CHECK-NEXT: v_writelane_b32 v7, s19, 55 -; CHECK-NEXT: ; implicit-def: $vgpr6 : SGPR spill to VGPR lane -; CHECK-NEXT: v_writelane_b32 v7, s52, 56 -; CHECK-NEXT: v_writelane_b32 v6, s60, 0 -; CHECK-NEXT: v_writelane_b32 v7, s53, 57 -; CHECK-NEXT: v_writelane_b32 v6, s61, 1 -; CHECK-NEXT: v_writelane_b32 v7, s54, 58 -; CHECK-NEXT: v_writelane_b32 v6, s62, 2 -; CHECK-NEXT: v_writelane_b32 v7, s55, 59 -; CHECK-NEXT: v_writelane_b32 v6, s63, 3 -; CHECK-NEXT: v_writelane_b32 v7, s56, 60 -; CHECK-NEXT: v_writelane_b32 v6, s64, 4 -; CHECK-NEXT: v_writelane_b32 v7, s57, 61 -; CHECK-NEXT: v_writelane_b32 v6, s65, 5 -; CHECK-NEXT: v_writelane_b32 v7, s58, 62 -; CHECK-NEXT: v_writelane_b32 v6, s66, 6 -; CHECK-NEXT: v_writelane_b32 v7, s59, 63 -; CHECK-NEXT: v_writelane_b32 v6, s67, 7 -; CHECK-NEXT: s_andn2_saveexec_b64 s[20:21], s[26:27] +; CHECK-NEXT: s_andn2_saveexec_b64 s[12:13], s[34:35] ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.4: ; %bb32 -; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[24:25] -; CHECK-NEXT: s_xor_b64 s[22:23], exec, s[8:9] +; CHECK-NEXT: s_and_saveexec_b64 s[14:15], s[72:73] +; CHECK-NEXT: s_xor_b64 s[14:15], exec, s[14:15] ; CHECK-NEXT: s_cbranch_execz .LBB0_6 ; CHECK-NEXT: ; %bb.5: ; %bb43 -; CHECK-NEXT: s_mov_b32 s8, 0 -; CHECK-NEXT: s_mov_b32 s9, s8 -; CHECK-NEXT: v_mov_b32_e32 v0, s8 -; CHECK-NEXT: v_readlane_b32 s36, v7, 0 -; CHECK-NEXT: v_mov_b32_e32 v1, s9 -; CHECK-NEXT: s_mov_b32 s10, s8 -; CHECK-NEXT: s_mov_b32 s11, s8 -; CHECK-NEXT: v_readlane_b32 s37, v7, 1 -; CHECK-NEXT: v_readlane_b32 s38, v7, 2 -; CHECK-NEXT: v_readlane_b32 s39, v7, 3 -; CHECK-NEXT: v_readlane_b32 s40, v7, 4 -; CHECK-NEXT: v_readlane_b32 s41, v7, 5 -; CHECK-NEXT: v_readlane_b32 s42, v7, 6 -; CHECK-NEXT: v_readlane_b32 s43, v7, 7 -; CHECK-NEXT: v_readlane_b32 s44, v7, 8 -; CHECK-NEXT: v_readlane_b32 s45, v7, 9 -; CHECK-NEXT: v_readlane_b32 s46, v7, 10 -; CHECK-NEXT: v_readlane_b32 s47, v7, 11 -; CHECK-NEXT: v_readlane_b32 s48, v7, 12 -; CHECK-NEXT: v_readlane_b32 s49, v7, 13 -; CHECK-NEXT: v_readlane_b32 s50, v7, 14 -; CHECK-NEXT: v_readlane_b32 s51, v7, 15 -; CHECK-NEXT: image_sample_lz v2, v[0:1], s[36:43], s[8:11] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s36, v7, 16 -; CHECK-NEXT: v_readlane_b32 s44, v7, 24 -; CHECK-NEXT: v_readlane_b32 s45, v7, 25 -; CHECK-NEXT: v_readlane_b32 s46, v7, 26 -; CHECK-NEXT: v_readlane_b32 s47, v7, 27 -; CHECK-NEXT: v_readlane_b32 s48, v7, 28 -; CHECK-NEXT: v_readlane_b32 s49, v7, 29 -; CHECK-NEXT: v_readlane_b32 s50, v7, 30 -; CHECK-NEXT: v_readlane_b32 s51, v7, 31 -; CHECK-NEXT: v_mov_b32_e32 v3, 0 -; CHECK-NEXT: v_mov_b32_e32 v4, v3 -; CHECK-NEXT: v_readlane_b32 s37, v7, 17 -; CHECK-NEXT: v_readlane_b32 s38, v7, 18 -; CHECK-NEXT: v_readlane_b32 s39, v7, 19 -; CHECK-NEXT: image_sample_lz v0, v[0:1], s[44:51], s[12:15] dmask:0x1 -; CHECK-NEXT: v_readlane_b32 s40, v7, 20 -; CHECK-NEXT: v_readlane_b32 s41, v7, 21 -; CHECK-NEXT: v_readlane_b32 s42, v7, 22 -; CHECK-NEXT: v_readlane_b32 s43, v7, 23 +; CHECK-NEXT: s_mov_b32 s16, 0 +; CHECK-NEXT: s_mov_b32 s17, s16 +; CHECK-NEXT: v_mov_b32_e32 v2, s16 +; CHECK-NEXT: v_mov_b32_e32 v3, s17 +; CHECK-NEXT: s_mov_b32 s18, s16 +; CHECK-NEXT: s_mov_b32 s19, s16 +; CHECK-NEXT: image_sample_lz v1, v[2:3], s[4:11], s[16:19] dmask:0x1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_mov_b64 s[4:5], s[36:37] +; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[8:9], s[40:41] +; CHECK-NEXT: s_mov_b64 s[10:11], s[42:43] +; CHECK-NEXT: v_readlane_b32 s36, v6, 0 +; CHECK-NEXT: v_readlane_b32 s44, v6, 8 +; CHECK-NEXT: v_readlane_b32 s45, v6, 9 +; CHECK-NEXT: v_readlane_b32 s46, v6, 10 +; CHECK-NEXT: v_readlane_b32 s47, v6, 11 +; CHECK-NEXT: v_readlane_b32 s48, v6, 12 +; CHECK-NEXT: v_readlane_b32 s49, v6, 13 +; CHECK-NEXT: v_readlane_b32 s50, v6, 14 +; CHECK-NEXT: v_readlane_b32 s51, v6, 15 +; CHECK-NEXT: v_readlane_b32 s37, v6, 1 +; CHECK-NEXT: v_readlane_b32 s38, v6, 2 +; CHECK-NEXT: v_readlane_b32 s39, v6, 3 +; CHECK-NEXT: v_readlane_b32 s40, v6, 4 +; CHECK-NEXT: v_readlane_b32 s41, v6, 5 +; CHECK-NEXT: image_sample_lz v0, v[2:3], s[44:51], s[20:23] dmask:0x1 +; CHECK-NEXT: v_readlane_b32 s42, v6, 6 +; CHECK-NEXT: v_readlane_b32 s43, v6, 7 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: s_mov_b64 s[42:43], s[10:11] +; CHECK-NEXT: v_mov_b32_e32 v3, v2 +; CHECK-NEXT: s_mov_b64 s[40:41], s[8:9] +; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[36:37], s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dwordx3 v[2:4], off, s[8:11], 0 +; CHECK-NEXT: buffer_store_dwordx3 v[1:3], off, s[16:19], 0 ; CHECK-NEXT: s_waitcnt vmcnt(1) -; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 +; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 ; CHECK-NEXT: ; implicit-def: $vgpr0 ; CHECK-NEXT: .LBB0_6: ; %Flow12 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[22:23] -; CHECK-NEXT: v_readlane_b32 s52, v7, 40 -; CHECK-NEXT: v_readlane_b32 s53, v7, 41 -; CHECK-NEXT: v_readlane_b32 s54, v7, 42 -; CHECK-NEXT: v_readlane_b32 s55, v7, 43 -; CHECK-NEXT: v_readlane_b32 s56, v7, 44 -; CHECK-NEXT: v_readlane_b32 s57, v7, 45 -; CHECK-NEXT: v_readlane_b32 s58, v7, 46 -; CHECK-NEXT: v_readlane_b32 s59, v7, 47 -; CHECK-NEXT: v_readlane_b32 s60, v7, 48 -; CHECK-NEXT: v_readlane_b32 s61, v7, 49 -; CHECK-NEXT: v_readlane_b32 s62, v7, 50 -; CHECK-NEXT: v_readlane_b32 s63, v7, 51 -; CHECK-NEXT: v_readlane_b32 s64, v7, 52 -; CHECK-NEXT: v_readlane_b32 s65, v7, 53 -; CHECK-NEXT: v_readlane_b32 s66, v7, 54 -; CHECK-NEXT: v_readlane_b32 s67, v7, 55 -; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] +; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[14:15] ; CHECK-NEXT: s_cbranch_execz .LBB0_9 ; CHECK-NEXT: ; %bb.7: ; %bb33.preheader ; CHECK-NEXT: s_mov_b32 s8, 0 ; CHECK-NEXT: s_mov_b32 s6, s8 ; CHECK-NEXT: s_mov_b32 s7, s8 ; CHECK-NEXT: v_mov_b32_e32 v1, s6 -; CHECK-NEXT: v_readlane_b32 s36, v7, 56 ; CHECK-NEXT: s_mov_b32 s9, s8 ; CHECK-NEXT: s_mov_b32 s10, s8 ; CHECK-NEXT: s_mov_b32 s11, s8 ; CHECK-NEXT: v_mov_b32_e32 v2, s7 -; CHECK-NEXT: v_readlane_b32 s37, v7, 57 -; CHECK-NEXT: v_readlane_b32 s38, v7, 58 -; CHECK-NEXT: v_readlane_b32 s39, v7, 59 -; CHECK-NEXT: v_readlane_b32 s40, v7, 60 -; CHECK-NEXT: v_readlane_b32 s41, v7, 61 -; CHECK-NEXT: v_readlane_b32 s42, v7, 62 -; CHECK-NEXT: v_readlane_b32 s43, v7, 63 -; CHECK-NEXT: s_nop 4 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: image_sample_lz v3, v[1:2], s[36:43], s[8:11] dmask:0x1 ; CHECK-NEXT: image_sample_lz v4, v[1:2], s[52:59], s[8:11] dmask:0x1 -; CHECK-NEXT: ; kill: killed $vgpr1_vgpr2 -; CHECK-NEXT: s_mov_b64 s[12:13], s[36:37] ; CHECK-NEXT: s_and_b64 vcc, exec, 0 -; CHECK-NEXT: v_readlane_b32 s44, v6, 0 -; CHECK-NEXT: v_readlane_b32 s45, v6, 1 -; CHECK-NEXT: v_readlane_b32 s46, v6, 2 -; CHECK-NEXT: v_readlane_b32 s47, v6, 3 -; CHECK-NEXT: v_readlane_b32 s48, v6, 4 -; CHECK-NEXT: v_readlane_b32 s49, v6, 5 -; CHECK-NEXT: v_readlane_b32 s50, v6, 6 -; CHECK-NEXT: v_readlane_b32 s51, v6, 7 -; CHECK-NEXT: s_mov_b64 s[14:15], s[38:39] -; CHECK-NEXT: s_mov_b64 s[16:17], s[40:41] -; CHECK-NEXT: s_mov_b64 s[18:19], s[42:43] -; CHECK-NEXT: ; kill: killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 -; CHECK-NEXT: ; kill: killed $sgpr8_sgpr9_sgpr10 killed $sgpr11 -; CHECK-NEXT: ; kill: killed $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_sub_f32_e32 v1, v4, v3 ; CHECK-NEXT: v_mul_f32_e32 v0, v1, v0 @@ -315,27 +183,33 @@ define void @main(i1 %arg) #0 { ; CHECK-NEXT: .LBB0_9: ; %Flow13 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: .LBB0_10: ; %UnifiedReturnBlock -; CHECK-NEXT: s_or_b64 exec, exec, s[20:21] -; CHECK-NEXT: v_readlane_b32 s67, v5, 15 -; CHECK-NEXT: v_readlane_b32 s66, v5, 14 -; CHECK-NEXT: v_readlane_b32 s65, v5, 13 -; CHECK-NEXT: v_readlane_b32 s64, v5, 12 -; CHECK-NEXT: v_readlane_b32 s55, v5, 11 -; CHECK-NEXT: v_readlane_b32 s54, v5, 10 -; CHECK-NEXT: v_readlane_b32 s53, v5, 9 -; CHECK-NEXT: v_readlane_b32 s52, v5, 8 -; CHECK-NEXT: v_readlane_b32 s51, v5, 7 -; CHECK-NEXT: v_readlane_b32 s50, v5, 6 -; CHECK-NEXT: v_readlane_b32 s49, v5, 5 -; CHECK-NEXT: v_readlane_b32 s48, v5, 4 -; CHECK-NEXT: v_readlane_b32 s39, v5, 3 -; CHECK-NEXT: v_readlane_b32 s38, v5, 2 -; CHECK-NEXT: v_readlane_b32 s37, v5, 1 -; CHECK-NEXT: v_readlane_b32 s36, v5, 0 +; CHECK-NEXT: s_or_b64 exec, exec, s[12:13] +; CHECK-NEXT: v_readlane_b32 s69, v5, 21 +; CHECK-NEXT: v_readlane_b32 s68, v5, 20 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_readlane_b32 s67, v5, 19 +; CHECK-NEXT: v_readlane_b32 s66, v5, 18 +; CHECK-NEXT: v_readlane_b32 s65, v5, 17 +; CHECK-NEXT: v_readlane_b32 s64, v5, 16 +; CHECK-NEXT: v_readlane_b32 s55, v5, 15 +; CHECK-NEXT: v_readlane_b32 s54, v5, 14 +; CHECK-NEXT: v_readlane_b32 s53, v5, 13 +; CHECK-NEXT: v_readlane_b32 s52, v5, 12 +; CHECK-NEXT: v_readlane_b32 s51, v5, 11 +; CHECK-NEXT: v_readlane_b32 s50, v5, 10 +; CHECK-NEXT: v_readlane_b32 s49, v5, 9 +; CHECK-NEXT: v_readlane_b32 s48, v5, 8 +; CHECK-NEXT: v_readlane_b32 s39, v5, 7 +; CHECK-NEXT: v_readlane_b32 s38, v5, 6 +; CHECK-NEXT: v_readlane_b32 s37, v5, 5 +; CHECK-NEXT: v_readlane_b32 s36, v5, 4 +; CHECK-NEXT: v_readlane_b32 s35, v5, 3 +; CHECK-NEXT: v_readlane_b32 s34, v5, 2 +; CHECK-NEXT: v_readlane_b32 s31, v5, 1 +; CHECK-NEXT: v_readlane_b32 s30, v5, 0 ; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v5, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/issue48473.mir b/llvm/test/CodeGen/AMDGPU/issue48473.mir index b447272702641..654461a62fa9f 100644 --- a/llvm/test/CodeGen/AMDGPU/issue48473.mir +++ b/llvm/test/CodeGen/AMDGPU/issue48473.mir @@ -43,7 +43,7 @@ # %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 # CHECK-LABEL: name: issue48473 -# CHECK: S_NOP 0, implicit killed renamable $sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, implicit killed renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, implicit killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 +# CHECK: S_NOP 0, implicit killed renamable $sgpr20_sgpr21_sgpr22_sgpr23, implicit killed renamable $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit killed renamable $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit killed renamable $sgpr40_sgpr41_sgpr42_sgpr43, implicit killed renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit killed renamable $sgpr36_sgpr37_sgpr38_sgpr39, implicit killed renamable $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, implicit killed renamable $sgpr80_sgpr81_sgpr82_sgpr83, implicit killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, implicit killed renamable $sgpr16_sgpr17_sgpr18_sgpr19, implicit killed renamable $sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit killed renamable $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit renamable $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed renamable $sgpr44_sgpr45_sgpr46_sgpr47, implicit killed renamable $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 --- name: issue48473 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll index a3bd0aabd5c3f..55fa02a0c582c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -815,50 +815,27 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s39, 3 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s40, 4 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s41, 5 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s42, 6 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s43, 7 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s44, 8 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s45, 9 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s46, 10 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s47, 11 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s48, 12 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s49, 13 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s50, 14 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s51, 15 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s52, 16 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s53, 17 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s54, 18 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s55, 19 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s56, 20 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s57, 21 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s58, 22 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s59, 23 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s60, 24 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s61, 25 ; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 ; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s62, 26 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s63, 27 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s64, 28 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 29 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 30 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 31 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s39, 3 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s48, 4 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s49, 5 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s50, 6 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s51, 7 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s52, 8 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s53, 9 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s54, 10 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s55, 11 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s64, 12 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s63, v29 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s62, v28 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s60, v26 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s59, v25 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s58, v24 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s57, v23 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s56, v22 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s53, v19 @@ -867,6 +844,17 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s50, v16 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s49, v15 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s48, v14 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s39, v5 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s38, v4 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s37, v3 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s36, v2 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s63, v29 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s62, v28 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s60, v26 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s59, v25 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s58, v24 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s57, v23 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s56, v22 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s47, v13 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s46, v12 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s45, v11 @@ -875,10 +863,6 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s42, v8 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s41, v7 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s40, v6 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s39, v5 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s38, v4 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s37, v3 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s36, v2 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(2) ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s66, v0 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(1) @@ -888,34 +872,18 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: ; use s[36:67] ; CHECK-SDAG-NEXT: ;;#ASMEND -; CHECK-SDAG-NEXT: v_readlane_b32 s67, v31, 31 -; CHECK-SDAG-NEXT: v_readlane_b32 s66, v31, 30 -; CHECK-SDAG-NEXT: v_readlane_b32 s65, v31, 29 -; CHECK-SDAG-NEXT: v_readlane_b32 s64, v31, 28 -; CHECK-SDAG-NEXT: v_readlane_b32 s63, v31, 27 -; CHECK-SDAG-NEXT: v_readlane_b32 s62, v31, 26 -; CHECK-SDAG-NEXT: v_readlane_b32 s61, v31, 25 -; CHECK-SDAG-NEXT: v_readlane_b32 s60, v31, 24 -; CHECK-SDAG-NEXT: v_readlane_b32 s59, v31, 23 -; CHECK-SDAG-NEXT: v_readlane_b32 s58, v31, 22 -; CHECK-SDAG-NEXT: v_readlane_b32 s57, v31, 21 -; CHECK-SDAG-NEXT: v_readlane_b32 s56, v31, 20 -; CHECK-SDAG-NEXT: v_readlane_b32 s55, v31, 19 -; CHECK-SDAG-NEXT: v_readlane_b32 s54, v31, 18 -; CHECK-SDAG-NEXT: v_readlane_b32 s53, v31, 17 -; CHECK-SDAG-NEXT: v_readlane_b32 s52, v31, 16 -; CHECK-SDAG-NEXT: v_readlane_b32 s51, v31, 15 -; CHECK-SDAG-NEXT: v_readlane_b32 s50, v31, 14 -; CHECK-SDAG-NEXT: v_readlane_b32 s49, v31, 13 -; CHECK-SDAG-NEXT: v_readlane_b32 s48, v31, 12 -; CHECK-SDAG-NEXT: v_readlane_b32 s47, v31, 11 -; CHECK-SDAG-NEXT: v_readlane_b32 s46, v31, 10 -; CHECK-SDAG-NEXT: v_readlane_b32 s45, v31, 9 -; CHECK-SDAG-NEXT: v_readlane_b32 s44, v31, 8 -; CHECK-SDAG-NEXT: v_readlane_b32 s43, v31, 7 -; CHECK-SDAG-NEXT: v_readlane_b32 s42, v31, 6 -; CHECK-SDAG-NEXT: v_readlane_b32 s41, v31, 5 -; CHECK-SDAG-NEXT: v_readlane_b32 s40, v31, 4 +; CHECK-SDAG-NEXT: v_readlane_b32 s67, v31, 15 +; CHECK-SDAG-NEXT: v_readlane_b32 s66, v31, 14 +; CHECK-SDAG-NEXT: v_readlane_b32 s65, v31, 13 +; CHECK-SDAG-NEXT: v_readlane_b32 s64, v31, 12 +; CHECK-SDAG-NEXT: v_readlane_b32 s55, v31, 11 +; CHECK-SDAG-NEXT: v_readlane_b32 s54, v31, 10 +; CHECK-SDAG-NEXT: v_readlane_b32 s53, v31, 9 +; CHECK-SDAG-NEXT: v_readlane_b32 s52, v31, 8 +; CHECK-SDAG-NEXT: v_readlane_b32 s51, v31, 7 +; CHECK-SDAG-NEXT: v_readlane_b32 s50, v31, 6 +; CHECK-SDAG-NEXT: v_readlane_b32 s49, v31, 5 +; CHECK-SDAG-NEXT: v_readlane_b32 s48, v31, 4 ; CHECK-SDAG-NEXT: v_readlane_b32 s39, v31, 3 ; CHECK-SDAG-NEXT: v_readlane_b32 s38, v31, 2 ; CHECK-SDAG-NEXT: v_readlane_b32 s37, v31, 1 @@ -940,45 +908,21 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s40, 4 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s41, 5 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s42, 6 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s43, 7 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s44, 8 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s45, 9 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s46, 10 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s47, 11 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s48, 12 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s49, 13 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s50, 14 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s51, 15 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s52, 16 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s53, 17 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s54, 18 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s55, 19 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s56, 20 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s57, 21 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s58, 22 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s59, 23 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s60, 24 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s61, 25 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s62, 26 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s63, 27 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s64, 28 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 29 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 30 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 31 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s48, 4 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s49, 5 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s50, 6 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s51, 7 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s52, 8 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s53, 9 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s54, 10 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s55, 11 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s64, 12 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s40, v6 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s41, v7 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s42, v8 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s43, v9 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s44, v10 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s45, v11 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s46, v12 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s47, v13 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s48, v14 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s49, v15 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s50, v16 @@ -987,6 +931,15 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s53, v19 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s54, v20 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s55, v21 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s64, v30 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s40, v6 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s41, v7 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s42, v8 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s43, v9 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s44, v10 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s45, v11 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s46, v12 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s47, v13 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s56, v22 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s57, v23 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s58, v24 @@ -995,7 +948,6 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s61, v27 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s62, v28 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s63, v29 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s64, v30 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(2) ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s65, v0 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(1) @@ -1005,34 +957,18 @@ define void @test_readfirstlane_v32f32(ptr addrspace(1) %out, <32 x float> %src) ; CHECK-GISEL-NEXT: ;;#ASMSTART ; CHECK-GISEL-NEXT: ; use s[36:67] ; CHECK-GISEL-NEXT: ;;#ASMEND -; CHECK-GISEL-NEXT: v_readlane_b32 s67, v31, 31 -; CHECK-GISEL-NEXT: v_readlane_b32 s66, v31, 30 -; CHECK-GISEL-NEXT: v_readlane_b32 s65, v31, 29 -; CHECK-GISEL-NEXT: v_readlane_b32 s64, v31, 28 -; CHECK-GISEL-NEXT: v_readlane_b32 s63, v31, 27 -; CHECK-GISEL-NEXT: v_readlane_b32 s62, v31, 26 -; CHECK-GISEL-NEXT: v_readlane_b32 s61, v31, 25 -; CHECK-GISEL-NEXT: v_readlane_b32 s60, v31, 24 -; CHECK-GISEL-NEXT: v_readlane_b32 s59, v31, 23 -; CHECK-GISEL-NEXT: v_readlane_b32 s58, v31, 22 -; CHECK-GISEL-NEXT: v_readlane_b32 s57, v31, 21 -; CHECK-GISEL-NEXT: v_readlane_b32 s56, v31, 20 -; CHECK-GISEL-NEXT: v_readlane_b32 s55, v31, 19 -; CHECK-GISEL-NEXT: v_readlane_b32 s54, v31, 18 -; CHECK-GISEL-NEXT: v_readlane_b32 s53, v31, 17 -; CHECK-GISEL-NEXT: v_readlane_b32 s52, v31, 16 -; CHECK-GISEL-NEXT: v_readlane_b32 s51, v31, 15 -; CHECK-GISEL-NEXT: v_readlane_b32 s50, v31, 14 -; CHECK-GISEL-NEXT: v_readlane_b32 s49, v31, 13 -; CHECK-GISEL-NEXT: v_readlane_b32 s48, v31, 12 -; CHECK-GISEL-NEXT: v_readlane_b32 s47, v31, 11 -; CHECK-GISEL-NEXT: v_readlane_b32 s46, v31, 10 -; CHECK-GISEL-NEXT: v_readlane_b32 s45, v31, 9 -; CHECK-GISEL-NEXT: v_readlane_b32 s44, v31, 8 -; CHECK-GISEL-NEXT: v_readlane_b32 s43, v31, 7 -; CHECK-GISEL-NEXT: v_readlane_b32 s42, v31, 6 -; CHECK-GISEL-NEXT: v_readlane_b32 s41, v31, 5 -; CHECK-GISEL-NEXT: v_readlane_b32 s40, v31, 4 +; CHECK-GISEL-NEXT: v_readlane_b32 s67, v31, 15 +; CHECK-GISEL-NEXT: v_readlane_b32 s66, v31, 14 +; CHECK-GISEL-NEXT: v_readlane_b32 s65, v31, 13 +; CHECK-GISEL-NEXT: v_readlane_b32 s64, v31, 12 +; CHECK-GISEL-NEXT: v_readlane_b32 s55, v31, 11 +; CHECK-GISEL-NEXT: v_readlane_b32 s54, v31, 10 +; CHECK-GISEL-NEXT: v_readlane_b32 s53, v31, 9 +; CHECK-GISEL-NEXT: v_readlane_b32 s52, v31, 8 +; CHECK-GISEL-NEXT: v_readlane_b32 s51, v31, 7 +; CHECK-GISEL-NEXT: v_readlane_b32 s50, v31, 6 +; CHECK-GISEL-NEXT: v_readlane_b32 s49, v31, 5 +; CHECK-GISEL-NEXT: v_readlane_b32 s48, v31, 4 ; CHECK-GISEL-NEXT: v_readlane_b32 s39, v31, 3 ; CHECK-GISEL-NEXT: v_readlane_b32 s38, v31, 2 ; CHECK-GISEL-NEXT: v_readlane_b32 s37, v31, 1 @@ -1324,50 +1260,27 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; CHECK-SDAG-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; CHECK-SDAG-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s39, 3 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s40, 4 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s41, 5 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s42, 6 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s43, 7 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s44, 8 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s45, 9 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s46, 10 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s47, 11 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s48, 12 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s49, 13 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s50, 14 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s51, 15 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s52, 16 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s53, 17 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s54, 18 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s55, 19 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s56, 20 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s57, 21 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s58, 22 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s59, 23 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s60, 24 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s61, 25 ; CHECK-SDAG-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; CHECK-SDAG-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s61, v27 ; CHECK-SDAG-NEXT: buffer_load_dword v27, off, s[0:3], s32 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s62, 26 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s63, 27 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s64, 28 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 29 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 30 -; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 31 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s36, 0 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s37, 1 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s38, 2 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s39, 3 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s48, 4 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s49, 5 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s50, 6 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s51, 7 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s52, 8 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s53, 9 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s54, 10 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s55, 11 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s64, 12 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s65, 13 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s66, 14 +; CHECK-SDAG-NEXT: v_writelane_b32 v31, s67, 15 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s64, v30 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s63, v29 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s62, v28 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s60, v26 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s59, v25 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s58, v24 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s57, v23 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s56, v22 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s55, v21 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s54, v20 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s53, v19 @@ -1376,6 +1289,17 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s50, v16 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s49, v15 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s48, v14 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s39, v5 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s38, v4 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s37, v3 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s36, v2 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s63, v29 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s62, v28 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s60, v26 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s59, v25 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s58, v24 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s57, v23 +; CHECK-SDAG-NEXT: v_readfirstlane_b32 s56, v22 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s47, v13 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s46, v12 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s45, v11 @@ -1384,10 +1308,6 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s42, v8 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s41, v7 ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s40, v6 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s39, v5 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s38, v4 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s37, v3 -; CHECK-SDAG-NEXT: v_readfirstlane_b32 s36, v2 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(2) ; CHECK-SDAG-NEXT: v_readfirstlane_b32 s66, v0 ; CHECK-SDAG-NEXT: s_waitcnt vmcnt(1) @@ -1397,34 +1317,18 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-SDAG-NEXT: ;;#ASMSTART ; CHECK-SDAG-NEXT: ; use s[36:67] ; CHECK-SDAG-NEXT: ;;#ASMEND -; CHECK-SDAG-NEXT: v_readlane_b32 s67, v31, 31 -; CHECK-SDAG-NEXT: v_readlane_b32 s66, v31, 30 -; CHECK-SDAG-NEXT: v_readlane_b32 s65, v31, 29 -; CHECK-SDAG-NEXT: v_readlane_b32 s64, v31, 28 -; CHECK-SDAG-NEXT: v_readlane_b32 s63, v31, 27 -; CHECK-SDAG-NEXT: v_readlane_b32 s62, v31, 26 -; CHECK-SDAG-NEXT: v_readlane_b32 s61, v31, 25 -; CHECK-SDAG-NEXT: v_readlane_b32 s60, v31, 24 -; CHECK-SDAG-NEXT: v_readlane_b32 s59, v31, 23 -; CHECK-SDAG-NEXT: v_readlane_b32 s58, v31, 22 -; CHECK-SDAG-NEXT: v_readlane_b32 s57, v31, 21 -; CHECK-SDAG-NEXT: v_readlane_b32 s56, v31, 20 -; CHECK-SDAG-NEXT: v_readlane_b32 s55, v31, 19 -; CHECK-SDAG-NEXT: v_readlane_b32 s54, v31, 18 -; CHECK-SDAG-NEXT: v_readlane_b32 s53, v31, 17 -; CHECK-SDAG-NEXT: v_readlane_b32 s52, v31, 16 -; CHECK-SDAG-NEXT: v_readlane_b32 s51, v31, 15 -; CHECK-SDAG-NEXT: v_readlane_b32 s50, v31, 14 -; CHECK-SDAG-NEXT: v_readlane_b32 s49, v31, 13 -; CHECK-SDAG-NEXT: v_readlane_b32 s48, v31, 12 -; CHECK-SDAG-NEXT: v_readlane_b32 s47, v31, 11 -; CHECK-SDAG-NEXT: v_readlane_b32 s46, v31, 10 -; CHECK-SDAG-NEXT: v_readlane_b32 s45, v31, 9 -; CHECK-SDAG-NEXT: v_readlane_b32 s44, v31, 8 -; CHECK-SDAG-NEXT: v_readlane_b32 s43, v31, 7 -; CHECK-SDAG-NEXT: v_readlane_b32 s42, v31, 6 -; CHECK-SDAG-NEXT: v_readlane_b32 s41, v31, 5 -; CHECK-SDAG-NEXT: v_readlane_b32 s40, v31, 4 +; CHECK-SDAG-NEXT: v_readlane_b32 s67, v31, 15 +; CHECK-SDAG-NEXT: v_readlane_b32 s66, v31, 14 +; CHECK-SDAG-NEXT: v_readlane_b32 s65, v31, 13 +; CHECK-SDAG-NEXT: v_readlane_b32 s64, v31, 12 +; CHECK-SDAG-NEXT: v_readlane_b32 s55, v31, 11 +; CHECK-SDAG-NEXT: v_readlane_b32 s54, v31, 10 +; CHECK-SDAG-NEXT: v_readlane_b32 s53, v31, 9 +; CHECK-SDAG-NEXT: v_readlane_b32 s52, v31, 8 +; CHECK-SDAG-NEXT: v_readlane_b32 s51, v31, 7 +; CHECK-SDAG-NEXT: v_readlane_b32 s50, v31, 6 +; CHECK-SDAG-NEXT: v_readlane_b32 s49, v31, 5 +; CHECK-SDAG-NEXT: v_readlane_b32 s48, v31, 4 ; CHECK-SDAG-NEXT: v_readlane_b32 s39, v31, 3 ; CHECK-SDAG-NEXT: v_readlane_b32 s38, v31, 2 ; CHECK-SDAG-NEXT: v_readlane_b32 s37, v31, 1 @@ -1449,45 +1353,21 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s37, 1 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s38, 2 ; CHECK-GISEL-NEXT: v_writelane_b32 v31, s39, 3 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s40, 4 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s41, 5 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s42, 6 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s43, 7 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s44, 8 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s45, 9 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s46, 10 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s47, 11 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s48, 12 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s49, 13 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s50, 14 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s51, 15 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s52, 16 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s53, 17 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s54, 18 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s55, 19 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s56, 20 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s57, 21 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s58, 22 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s59, 23 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s60, 24 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s61, 25 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s62, 26 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s63, 27 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s64, 28 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 29 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 30 -; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 31 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s48, 4 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s49, 5 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s50, 6 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s51, 7 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s52, 8 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s53, 9 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s54, 10 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s55, 11 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s64, 12 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s65, 13 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s66, 14 +; CHECK-GISEL-NEXT: v_writelane_b32 v31, s67, 15 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s37, v3 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s38, v4 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s39, v5 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s40, v6 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s41, v7 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s42, v8 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s43, v9 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s44, v10 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s45, v11 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s46, v12 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s47, v13 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s48, v14 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s49, v15 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s50, v16 @@ -1496,6 +1376,15 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s53, v19 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s54, v20 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s55, v21 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s64, v30 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s40, v6 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s41, v7 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s42, v8 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s43, v9 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s44, v10 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s45, v11 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s46, v12 +; CHECK-GISEL-NEXT: v_readfirstlane_b32 s47, v13 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s56, v22 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s57, v23 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s58, v24 @@ -1504,7 +1393,6 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s61, v27 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s62, v28 ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s63, v29 -; CHECK-GISEL-NEXT: v_readfirstlane_b32 s64, v30 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(2) ; CHECK-GISEL-NEXT: v_readfirstlane_b32 s65, v0 ; CHECK-GISEL-NEXT: s_waitcnt vmcnt(1) @@ -1514,34 +1402,18 @@ define void @test_readfirstlane_v32i32(ptr addrspace(1) %out, <32 x i32> %src) { ; CHECK-GISEL-NEXT: ;;#ASMSTART ; CHECK-GISEL-NEXT: ; use s[36:67] ; CHECK-GISEL-NEXT: ;;#ASMEND -; CHECK-GISEL-NEXT: v_readlane_b32 s67, v31, 31 -; CHECK-GISEL-NEXT: v_readlane_b32 s66, v31, 30 -; CHECK-GISEL-NEXT: v_readlane_b32 s65, v31, 29 -; CHECK-GISEL-NEXT: v_readlane_b32 s64, v31, 28 -; CHECK-GISEL-NEXT: v_readlane_b32 s63, v31, 27 -; CHECK-GISEL-NEXT: v_readlane_b32 s62, v31, 26 -; CHECK-GISEL-NEXT: v_readlane_b32 s61, v31, 25 -; CHECK-GISEL-NEXT: v_readlane_b32 s60, v31, 24 -; CHECK-GISEL-NEXT: v_readlane_b32 s59, v31, 23 -; CHECK-GISEL-NEXT: v_readlane_b32 s58, v31, 22 -; CHECK-GISEL-NEXT: v_readlane_b32 s57, v31, 21 -; CHECK-GISEL-NEXT: v_readlane_b32 s56, v31, 20 -; CHECK-GISEL-NEXT: v_readlane_b32 s55, v31, 19 -; CHECK-GISEL-NEXT: v_readlane_b32 s54, v31, 18 -; CHECK-GISEL-NEXT: v_readlane_b32 s53, v31, 17 -; CHECK-GISEL-NEXT: v_readlane_b32 s52, v31, 16 -; CHECK-GISEL-NEXT: v_readlane_b32 s51, v31, 15 -; CHECK-GISEL-NEXT: v_readlane_b32 s50, v31, 14 -; CHECK-GISEL-NEXT: v_readlane_b32 s49, v31, 13 -; CHECK-GISEL-NEXT: v_readlane_b32 s48, v31, 12 -; CHECK-GISEL-NEXT: v_readlane_b32 s47, v31, 11 -; CHECK-GISEL-NEXT: v_readlane_b32 s46, v31, 10 -; CHECK-GISEL-NEXT: v_readlane_b32 s45, v31, 9 -; CHECK-GISEL-NEXT: v_readlane_b32 s44, v31, 8 -; CHECK-GISEL-NEXT: v_readlane_b32 s43, v31, 7 -; CHECK-GISEL-NEXT: v_readlane_b32 s42, v31, 6 -; CHECK-GISEL-NEXT: v_readlane_b32 s41, v31, 5 -; CHECK-GISEL-NEXT: v_readlane_b32 s40, v31, 4 +; CHECK-GISEL-NEXT: v_readlane_b32 s67, v31, 15 +; CHECK-GISEL-NEXT: v_readlane_b32 s66, v31, 14 +; CHECK-GISEL-NEXT: v_readlane_b32 s65, v31, 13 +; CHECK-GISEL-NEXT: v_readlane_b32 s64, v31, 12 +; CHECK-GISEL-NEXT: v_readlane_b32 s55, v31, 11 +; CHECK-GISEL-NEXT: v_readlane_b32 s54, v31, 10 +; CHECK-GISEL-NEXT: v_readlane_b32 s53, v31, 9 +; CHECK-GISEL-NEXT: v_readlane_b32 s52, v31, 8 +; CHECK-GISEL-NEXT: v_readlane_b32 s51, v31, 7 +; CHECK-GISEL-NEXT: v_readlane_b32 s50, v31, 6 +; CHECK-GISEL-NEXT: v_readlane_b32 s49, v31, 5 +; CHECK-GISEL-NEXT: v_readlane_b32 s48, v31, 4 ; CHECK-GISEL-NEXT: v_readlane_b32 s39, v31, 3 ; CHECK-GISEL-NEXT: v_readlane_b32 s38, v31, 2 ; CHECK-GISEL-NEXT: v_readlane_b32 s37, v31, 1 diff --git a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll index 04da358a49bc3..0ad9573ff27cd 100644 --- a/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll +++ b/llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll @@ -34,10 +34,9 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b64 s[36:37], s[6:7] ; GLOBALNESS1-NEXT: s_load_dwordx4 s[52:55], s[8:9], 0x0 ; GLOBALNESS1-NEXT: s_load_dword s6, s[8:9], 0x14 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v42, 0 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: global_store_dword v[0:1], v42, off +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], 0, 0 +; GLOBALNESS1-NEXT: global_store_dword v[44:45], v42, off ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: global_load_dword v2, v42, s[52:53] ; GLOBALNESS1-NEXT: s_mov_b64 s[48:49], s[4:5] @@ -46,6 +45,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; GLOBALNESS1-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GLOBALNESS1-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: s_addc_u32 s1, s1, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x40994400 @@ -70,19 +70,21 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS1-NEXT: s_mov_b64 s[38:39], s[8:9] ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 -; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr59 : SGPR spill to VGPR lane ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s8, 0 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s8, 0 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s9, 1 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s9, 1 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[70:71], 1, v3 +; GLOBALNESS1-NEXT: v_mov_b32_e32 v46, 0x80 ; GLOBALNESS1-NEXT: s_mov_b32 s82, s16 ; GLOBALNESS1-NEXT: s_mov_b32 s83, s15 ; GLOBALNESS1-NEXT: s_mov_b32 s84, s14 ; GLOBALNESS1-NEXT: s_mov_b64 s[34:35], s[10:11] +; GLOBALNESS1-NEXT: v_mov_b32_e32 v47, 0 ; GLOBALNESS1-NEXT: s_mov_b32 s32, 0 -; GLOBALNESS1-NEXT: ; implicit-def: $vgpr44_vgpr45 +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr56_vgpr57 ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc @@ -91,45 +93,43 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 2 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s4, 2 ; GLOBALNESS1-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 3 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s5, 3 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v3 ; GLOBALNESS1-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 4 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 5 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s4, 4 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s5, 5 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v2 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s4, 6 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s5, 7 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s4, 6 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s5, 7 ; GLOBALNESS1-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s70, 8 -; GLOBALNESS1-NEXT: v_writelane_b32 v56, s71, 9 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s70, 8 +; GLOBALNESS1-NEXT: v_writelane_b32 v59, s71, 9 ; GLOBALNESS1-NEXT: s_branch .LBB1_4 ; GLOBALNESS1-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_readlane_b32 s6, v56, 6 -; GLOBALNESS1-NEXT: v_readlane_b32 s7, v56, 7 +; GLOBALNESS1-NEXT: v_readlane_b32 s6, v59, 6 +; GLOBALNESS1-NEXT: v_readlane_b32 s7, v59, 7 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_29 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_28 ; GLOBALNESS1-NEXT: .LBB1_2: ; %Flow15 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[4:5] -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], 0 ; GLOBALNESS1-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS1-NEXT: .LBB1_3: ; %Flow28 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1] -; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_30 +; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[8:9] +; GLOBALNESS1-NEXT: v_pk_mov_b32 v[56:57], v[0:1], v[0:1] op_sel:[0,1] +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_29 ; GLOBALNESS1-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS1-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS1-NEXT: ; Child Loop BB1_16 Depth 2 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0x80 -; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0 -; GLOBALNESS1-NEXT: flat_load_dword v40, v[0:1] +; GLOBALNESS1-NEXT: flat_load_dword v40, v[46:47] ; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS1-NEXT: buffer_store_dword v42, off, s[0:3], 0 -; GLOBALNESS1-NEXT: flat_load_dword v46, v[0:1] +; GLOBALNESS1-NEXT: flat_load_dword v58, v[46:47] ; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS1-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 @@ -169,33 +169,33 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS1-NEXT: .LBB1_9: ; %Flow25 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], -1 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24 +; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_3 ; GLOBALNESS1-NEXT: ; %bb.10: ; %baz.exit.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS1-NEXT: flat_load_dword v0, v[2:3] +; GLOBALNESS1-NEXT: flat_load_dword v0, v[44:45] ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_gt_i32_e64 s[86:87], 0, v0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v1, 0x3ff00000 ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[52:53], s[86:87] -; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_26 +; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_25 ; GLOBALNESS1-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[2:3], off -; GLOBALNESS1-NEXT: v_readlane_b32 s4, v56, 2 -; GLOBALNESS1-NEXT: v_readlane_b32 s5, v56, 3 +; GLOBALNESS1-NEXT: global_load_dwordx2 v[0:1], v[44:45], off +; GLOBALNESS1-NEXT: v_readlane_b32 s4, v59, 2 +; GLOBALNESS1-NEXT: v_readlane_b32 s5, v59, 3 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS1-NEXT: ; %bb.12: ; %bb39.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS1-NEXT: .LBB1_13: ; %bb44.lr.ph.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 +; GLOBALNESS1-NEXT: v_cmp_ne_u32_e32 vcc, 0, v58 ; GLOBALNESS1-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc ; GLOBALNESS1-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS1-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] @@ -209,7 +209,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: .LBB1_15: ; %bb63.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[68:69] -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_25 +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_24 ; GLOBALNESS1-NEXT: .LBB1_16: ; %bb44.i ; GLOBALNESS1-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS1-NEXT: ; => This Inner Loop Header: Depth=2 @@ -225,8 +225,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS1-NEXT: ; %bb.19: ; %bb3.i.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS1-NEXT: v_readlane_b32 s4, v56, 0 -; GLOBALNESS1-NEXT: v_readlane_b32 s5, v56, 1 +; GLOBALNESS1-NEXT: v_readlane_b32 s4, v59, 0 +; GLOBALNESS1-NEXT: v_readlane_b32 s5, v59, 1 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS1-NEXT: ; %bb.20: ; %bb6.i.i @@ -254,7 +254,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55] -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[46:47], 0, 0 ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS1-NEXT: s_mov_b64 s[8:9], s[70:71] @@ -263,55 +262,49 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b32 s13, s83 ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[44:45], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[56:57], off ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS1-NEXT: ; %bb.23: ; %bb62.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[46:47], v[42:43], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_14 -; GLOBALNESS1-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: s_mov_b64 s[6:7], -1 -; GLOBALNESS1-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GLOBALNESS1-NEXT: s_branch .LBB1_3 -; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow23 +; GLOBALNESS1-NEXT: .LBB1_24: ; %Flow23 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: s_load_dwordx4 s[4:7], s[38:39], 0x0 -; GLOBALNESS1-NEXT: v_readlane_b32 s70, v56, 8 +; GLOBALNESS1-NEXT: v_readlane_b32 s70, v59, 8 ; GLOBALNESS1-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS1-NEXT: v_readlane_b32 s71, v56, 9 +; GLOBALNESS1-NEXT: v_readlane_b32 s71, v59, 9 ; GLOBALNESS1-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS1-NEXT: s_mov_b32 s55, s7 -; GLOBALNESS1-NEXT: .LBB1_26: ; %Flow24 +; GLOBALNESS1-NEXT: .LBB1_25: ; %Flow24 ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: s_or_b64 exec, exec, s[52:53] ; GLOBALNESS1-NEXT: s_and_saveexec_b64 s[4:5], s[86:87] ; GLOBALNESS1-NEXT: s_cbranch_execz .LBB1_2 -; GLOBALNESS1-NEXT: ; %bb.27: ; %bb67.i +; GLOBALNESS1-NEXT: ; %bb.26: ; %bb67.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS1-NEXT: v_readlane_b32 s6, v56, 4 -; GLOBALNESS1-NEXT: v_readlane_b32 s7, v56, 5 +; GLOBALNESS1-NEXT: v_readlane_b32 s6, v59, 4 +; GLOBALNESS1-NEXT: v_readlane_b32 s7, v59, 5 ; GLOBALNESS1-NEXT: s_and_b64 vcc, exec, s[6:7] ; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_1 -; GLOBALNESS1-NEXT: ; %bb.28: ; %bb69.i +; GLOBALNESS1-NEXT: ; %bb.27: ; %bb69.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_1 -; GLOBALNESS1-NEXT: .LBB1_29: ; %bb73.i +; GLOBALNESS1-NEXT: .LBB1_28: ; %bb73.i ; GLOBALNESS1-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS1-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS1-NEXT: global_store_dwordx2 v[2:3], v[42:43], off +; GLOBALNESS1-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS1-NEXT: s_branch .LBB1_2 -; GLOBALNESS1-NEXT: .LBB1_30: ; %loop.exit.guard +; GLOBALNESS1-NEXT: .LBB1_29: ; %loop.exit.guard ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], -1 -; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_32 -; GLOBALNESS1-NEXT: ; %bb.31: ; %bb7.i.i +; GLOBALNESS1-NEXT: s_cbranch_vccz .LBB1_31 +; GLOBALNESS1-NEXT: ; %bb.30: ; %bb7.i.i ; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] @@ -326,10 +319,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS1-NEXT: s_mov_b64 s[4:5], 0 -; GLOBALNESS1-NEXT: .LBB1_32: ; %Flow +; GLOBALNESS1-NEXT: .LBB1_31: ; %Flow ; GLOBALNESS1-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_34 -; GLOBALNESS1-NEXT: ; %bb.33: ; %bb11.i.i +; GLOBALNESS1-NEXT: s_cbranch_vccnz .LBB1_33 +; GLOBALNESS1-NEXT: ; %bb.32: ; %bb11.i.i ; GLOBALNESS1-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS1-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS1-NEXT: s_getpc_b64 s[16:17] @@ -343,17 +336,16 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS1-NEXT: s_mov_b32 s14, s82 ; GLOBALNESS1-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS1-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS1-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock +; GLOBALNESS1-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock ; ; GLOBALNESS0-LABEL: kernel: ; GLOBALNESS0: ; %bb.0: ; %bb ; GLOBALNESS0-NEXT: s_mov_b64 s[36:37], s[6:7] ; GLOBALNESS0-NEXT: s_load_dwordx4 s[52:55], s[8:9], 0x0 ; GLOBALNESS0-NEXT: s_load_dword s6, s[8:9], 0x14 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v42, 0 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 -; GLOBALNESS0-NEXT: global_store_dword v[0:1], v42, off +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], 0, 0 +; GLOBALNESS0-NEXT: global_store_dword v[44:45], v42, off ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: global_load_dword v2, v42, s[52:53] ; GLOBALNESS0-NEXT: s_mov_b64 s[48:49], s[4:5] @@ -362,6 +354,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_add_u32 flat_scratch_lo, s12, s17 ; GLOBALNESS0-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GLOBALNESS0-NEXT: s_add_u32 s0, s0, s17 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v41, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: s_addc_u32 s1, s1, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x40994400 @@ -386,19 +379,21 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_xor_b64 s[4:5], s[4:5], -1 ; GLOBALNESS0-NEXT: s_mov_b64 s[38:39], s[8:9] ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[8:9], 1, v1 -; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56 : SGPR spill to VGPR lane +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr59 : SGPR spill to VGPR lane ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[66:67], 1, v0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s8, 0 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s8, 0 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[68:69], 1, v0 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s9, 1 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s9, 1 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[84:85], 1, v3 +; GLOBALNESS0-NEXT: v_mov_b32_e32 v46, 0x80 ; GLOBALNESS0-NEXT: s_mov_b32 s70, s16 ; GLOBALNESS0-NEXT: s_mov_b32 s71, s15 ; GLOBALNESS0-NEXT: s_mov_b32 s82, s14 ; GLOBALNESS0-NEXT: s_mov_b64 s[34:35], s[10:11] +; GLOBALNESS0-NEXT: v_mov_b32_e32 v47, 0 ; GLOBALNESS0-NEXT: s_mov_b32 s32, 0 -; GLOBALNESS0-NEXT: ; implicit-def: $vgpr44_vgpr45 +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr56_vgpr57 ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc @@ -407,45 +402,43 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v0 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 2 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s4, 2 ; GLOBALNESS0-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 3 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s5, 3 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v3 ; GLOBALNESS0-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 4 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 5 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s4, 4 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s5, 5 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[4:5], 1, v2 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s4, 6 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s5, 7 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s4, 6 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s5, 7 ; GLOBALNESS0-NEXT: v_cmp_ne_u32_e64 s[80:81], 1, v1 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s84, 8 -; GLOBALNESS0-NEXT: v_writelane_b32 v56, s85, 9 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s84, 8 +; GLOBALNESS0-NEXT: v_writelane_b32 v59, s85, 9 ; GLOBALNESS0-NEXT: s_branch .LBB1_4 ; GLOBALNESS0-NEXT: .LBB1_1: ; %bb70.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_readlane_b32 s6, v56, 6 -; GLOBALNESS0-NEXT: v_readlane_b32 s7, v56, 7 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v59, 6 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v59, 7 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_29 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_28 ; GLOBALNESS0-NEXT: .LBB1_2: ; %Flow15 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[4:5] -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], 0 +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], 0 ; GLOBALNESS0-NEXT: ; implicit-def: $sgpr4_sgpr5 ; GLOBALNESS0-NEXT: .LBB1_3: ; %Flow28 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[44:45], v[0:1], v[0:1] op_sel:[0,1] -; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_30 +; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[8:9] +; GLOBALNESS0-NEXT: v_pk_mov_b32 v[56:57], v[0:1], v[0:1] op_sel:[0,1] +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_29 ; GLOBALNESS0-NEXT: .LBB1_4: ; %bb5 ; GLOBALNESS0-NEXT: ; =>This Loop Header: Depth=1 ; GLOBALNESS0-NEXT: ; Child Loop BB1_16 Depth 2 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0x80 -; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0 -; GLOBALNESS0-NEXT: flat_load_dword v40, v[0:1] +; GLOBALNESS0-NEXT: flat_load_dword v40, v[46:47] ; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS0-NEXT: buffer_store_dword v42, off, s[0:3], 0 -; GLOBALNESS0-NEXT: flat_load_dword v46, v[0:1] +; GLOBALNESS0-NEXT: flat_load_dword v58, v[46:47] ; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[4:5] ; GLOBALNESS0-NEXT: s_add_u32 s4, s4, wobble@gotpcrel32@lo+4 @@ -485,34 +478,34 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GLOBALNESS0-NEXT: .LBB1_9: ; %Flow25 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 +; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], -1 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24 +; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_3 ; GLOBALNESS0-NEXT: ; %bb.10: ; %baz.exit.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS0-NEXT: flat_load_dword v0, v[2:3] +; GLOBALNESS0-NEXT: flat_load_dword v0, v[44:45] ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_gt_i32_e64 s[86:87], 0, v0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v0, 0 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v1, 0x3ff00000 ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[52:53], s[86:87] -; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_26 +; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_25 ; GLOBALNESS0-NEXT: ; %bb.11: ; %bb33.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[2:3], off -; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 2 -; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 3 +; GLOBALNESS0-NEXT: global_load_dwordx2 v[0:1], v[44:45], off +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v59, 2 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v59, 3 ; GLOBALNESS0-NEXT: s_mov_b32 s83, s55 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_13 ; GLOBALNESS0-NEXT: ; %bb.12: ; %bb39.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS0-NEXT: .LBB1_13: ; %bb44.lr.ph.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v46 +; GLOBALNESS0-NEXT: v_cmp_ne_u32_e32 vcc, 0, v58 ; GLOBALNESS0-NEXT: v_cndmask_b32_e32 v2, 0, v40, vcc ; GLOBALNESS0-NEXT: s_waitcnt vmcnt(0) ; GLOBALNESS0-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] @@ -526,7 +519,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: .LBB1_15: ; %bb63.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[68:69] -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_25 +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_24 ; GLOBALNESS0-NEXT: .LBB1_16: ; %bb44.i ; GLOBALNESS0-NEXT: ; Parent Loop BB1_4 Depth=1 ; GLOBALNESS0-NEXT: ; => This Inner Loop Header: Depth=2 @@ -542,8 +535,8 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS0-NEXT: ; %bb.19: ; %bb3.i.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 -; GLOBALNESS0-NEXT: v_readlane_b32 s4, v56, 0 -; GLOBALNESS0-NEXT: v_readlane_b32 s5, v56, 1 +; GLOBALNESS0-NEXT: v_readlane_b32 s4, v59, 0 +; GLOBALNESS0-NEXT: v_readlane_b32 s5, v59, 1 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_21 ; GLOBALNESS0-NEXT: ; %bb.20: ; %bb6.i.i @@ -571,7 +564,6 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_waitcnt lgkmcnt(0) ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55] -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[46:47], 0, 0 ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], s[48:49] ; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], s[36:37] ; GLOBALNESS0-NEXT: s_mov_b64 s[8:9], s[84:85] @@ -580,53 +572,47 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b32 s13, s71 ; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[44:45], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[56:57], off ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[54:55] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[96:97] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_14 ; GLOBALNESS0-NEXT: ; %bb.23: ; %bb62.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_16 Depth=2 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[46:47], v[42:43], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_14 -; GLOBALNESS0-NEXT: .LBB1_24: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: s_mov_b64 s[6:7], -1 -; GLOBALNESS0-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GLOBALNESS0-NEXT: s_branch .LBB1_3 -; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow23 +; GLOBALNESS0-NEXT: .LBB1_24: ; %Flow23 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_readlane_b32 s84, v56, 8 +; GLOBALNESS0-NEXT: v_readlane_b32 s84, v59, 8 ; GLOBALNESS0-NEXT: v_pk_mov_b32 v[0:1], 0, 0 ; GLOBALNESS0-NEXT: s_mov_b32 s55, s83 -; GLOBALNESS0-NEXT: v_readlane_b32 s85, v56, 9 -; GLOBALNESS0-NEXT: .LBB1_26: ; %Flow24 +; GLOBALNESS0-NEXT: v_readlane_b32 s85, v59, 9 +; GLOBALNESS0-NEXT: .LBB1_25: ; %Flow24 ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: s_or_b64 exec, exec, s[52:53] ; GLOBALNESS0-NEXT: s_and_saveexec_b64 s[4:5], s[86:87] ; GLOBALNESS0-NEXT: s_cbranch_execz .LBB1_2 -; GLOBALNESS0-NEXT: ; %bb.27: ; %bb67.i +; GLOBALNESS0-NEXT: ; %bb.26: ; %bb67.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 -; GLOBALNESS0-NEXT: v_readlane_b32 s6, v56, 4 -; GLOBALNESS0-NEXT: v_readlane_b32 s7, v56, 5 +; GLOBALNESS0-NEXT: v_readlane_b32 s6, v59, 4 +; GLOBALNESS0-NEXT: v_readlane_b32 s7, v59, 5 ; GLOBALNESS0-NEXT: s_and_b64 vcc, exec, s[6:7] ; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_1 -; GLOBALNESS0-NEXT: ; %bb.28: ; %bb69.i +; GLOBALNESS0-NEXT: ; %bb.27: ; %bb69.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_1 -; GLOBALNESS0-NEXT: .LBB1_29: ; %bb73.i +; GLOBALNESS0-NEXT: .LBB1_28: ; %bb73.i ; GLOBALNESS0-NEXT: ; in Loop: Header=BB1_4 Depth=1 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v43, v42 -; GLOBALNESS0-NEXT: v_pk_mov_b32 v[2:3], 0, 0 -; GLOBALNESS0-NEXT: global_store_dwordx2 v[2:3], v[42:43], off +; GLOBALNESS0-NEXT: global_store_dwordx2 v[44:45], v[42:43], off ; GLOBALNESS0-NEXT: s_branch .LBB1_2 -; GLOBALNESS0-NEXT: .LBB1_30: ; %loop.exit.guard +; GLOBALNESS0-NEXT: .LBB1_29: ; %loop.exit.guard ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], -1 -; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_32 -; GLOBALNESS0-NEXT: ; %bb.31: ; %bb7.i.i +; GLOBALNESS0-NEXT: s_cbranch_vccz .LBB1_31 +; GLOBALNESS0-NEXT: ; %bb.30: ; %bb7.i.i ; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] @@ -641,10 +627,10 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GLOBALNESS0-NEXT: s_mov_b64 s[4:5], 0 -; GLOBALNESS0-NEXT: .LBB1_32: ; %Flow +; GLOBALNESS0-NEXT: .LBB1_31: ; %Flow ; GLOBALNESS0-NEXT: s_andn2_b64 vcc, exec, s[4:5] -; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_34 -; GLOBALNESS0-NEXT: ; %bb.33: ; %bb11.i.i +; GLOBALNESS0-NEXT: s_cbranch_vccnz .LBB1_33 +; GLOBALNESS0-NEXT: ; %bb.32: ; %bb11.i.i ; GLOBALNESS0-NEXT: s_add_u32 s8, s38, 40 ; GLOBALNESS0-NEXT: s_addc_u32 s9, s39, 0 ; GLOBALNESS0-NEXT: s_getpc_b64 s[16:17] @@ -658,7 +644,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %arg1.global, i1 %tmp3.i.i, i ; GLOBALNESS0-NEXT: s_mov_b32 s14, s70 ; GLOBALNESS0-NEXT: v_mov_b32_e32 v31, v41 ; GLOBALNESS0-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GLOBALNESS0-NEXT: .LBB1_34: ; %UnifiedUnreachableBlock +; GLOBALNESS0-NEXT: .LBB1_33: ; %UnifiedUnreachableBlock bb: store i32 0, ptr addrspace(1) null, align 4 %tmp4 = load i32, ptr addrspace(1) %arg1.global, align 4