diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll index 4ca00f2daf97a..4b5a7c207055a 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll @@ -12,7 +12,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART @@ -20,16 +26,28 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART @@ -37,17 +55,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s32, 0x4040 @@ -57,10 +85,16 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: @@ -70,7 +104,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo +; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_ci_u32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -80,34 +120,54 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s0 +; GFX12-NEXT: s_mov_b32 s55, s0 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: s_movk_i32 s59, 0x4040 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 +; GFX8-NEXT: s_movk_i32 s55, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 +; GFX8-NEXT: v_readfirstlane_b32 s55, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -115,34 +175,52 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc() #0 { ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 +; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) - call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0) + call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0) ret void } @@ -152,36 +230,65 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 -; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 -; GFX10_1-NEXT: s_addk_i32 s59, 0x4040 +; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 +; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59 +; GFX10_1-NEXT: ; use s55 ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 -; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 -; GFX10_3-NEXT: s_addk_i32 s59, 0x4040 +; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 +; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59 +; GFX10_3-NEXT: ; use s55 ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v0, s0 @@ -189,10 +296,16 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59 +; GFX11-NEXT: ; use s55 ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4044 +; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_dead_scc: @@ -202,67 +315,110 @@ define void @scalar_mov_materializes_frame_index_dead_scc() #0 { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_i32 s0, s32, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 s55, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s0 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59 +; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:16388 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 +; GFX8-NEXT: s_lshr_b32 s55, s32, 6 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: s_lshr_b32 s59, s32, 6 +; GFX8-NEXT: s_addk_i32 s55, 0x4040 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: s_addk_i32 s59, 0x4040 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59 +; GFX8-NEXT: ; use s55 ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 +; GFX900-NEXT: s_lshr_b32 s55, s32, 6 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: s_lshr_b32 s59, s32, 6 +; GFX900-NEXT: s_addk_i32 s55, 0x4040 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: s_addk_i32 s59, 0x4040 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59 +; GFX900-NEXT: ; use s55 ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101100 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_dead_scc: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: s_add_i32 s0, s32, 0x4040 +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59 +; GFX942-NEXT: ; use s55 ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4044 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) - call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca1) + call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca1) ret void } @@ -272,8 +428,14 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s5, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 -; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -281,12 +443,19 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_1-NEXT: s_mov_b32 s33, s5 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_mov_b32 s33, s5 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -294,8 +463,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s5, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 -; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 @@ -303,12 +477,18 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10_3-NEXT: s_mov_b32 s33, s5 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 0x4040, v0 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: v_readfirstlane_b32 s55, v0 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80880 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_mov_b32 s33, s5 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -316,9 +496,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 +; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_add_i32 s0, s33, 64 -; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: s_addc_u32 s0, s33, 0x4040 @@ -327,11 +511,18 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s33, s1 -; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: s_mov_b32 s32, s33 +; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4044 +; GFX11-NEXT: scratch_load_b32 v1, off, s2 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s1 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -343,9 +534,13 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s1, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s33 offset:16388 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo -; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_add_co_ci_u32 s0, s33, 0x4000 ; GFX12-NEXT: v_mov_b32_e32 v0, s33 ; GFX12-NEXT: s_wait_alu 0xfffe @@ -355,12 +550,18 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s0 +; GFX12-NEXT: s_mov_b32 s55, s0 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v1, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s33 offset:16388 ; 4-byte Folded Reload +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_mov_b32 s33, s1 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -369,22 +570,33 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s6, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX8-NEXT: s_movk_i32 s59, 0x4040 +; GFX8-NEXT: s_movk_i32 s55, 0x4040 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, s55, v0 ; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 +; GFX8-NEXT: v_readfirstlane_b32 s55, v0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v1, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_mov_b32 s33, s6 +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -392,21 +604,32 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s6, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 +; GFX900-NEXT: v_readfirstlane_b32 s55, v0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v1, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101100 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_mov_b32 s33, s6 +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_fp: @@ -414,6 +637,10 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s2, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 +; GFX942-NEXT: scratch_store_dword off, v1, s3 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_add_i32 s0, s33, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 @@ -421,20 +648,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_fp() #1 { ; GFX942-NEXT: s_addc_u32 s0, s33, 0x4040 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; use alloca0 v0 ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_mov_b32 s59, s0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v1, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4044 +; GFX942-NEXT: scratch_load_dword v1, off, s3 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_mov_b32 s33, s2 +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca i32, align 4, addrspace(5) call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) - call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1, i32 0) + call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1, i32 0) ret void } @@ -442,39 +676,75 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s32 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo -; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addc_u32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: @@ -484,53 +754,97 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset() ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 +; GFX12-NEXT: s_mov_b32 s55, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo -; GFX12-NEXT: s_mov_b32 s59, s32 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s55, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: s_mov_b32 s59, 64 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v0, s55, 0 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX8-NEXT: s_mov_b32 s55, 64 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 +; GFX8-NEXT: v_readfirstlane_b32 s55, v1 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v0, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s32 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 +; GFX900-NEXT: v_writelane_b32 v0, s55, 0 +; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v0, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s32, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) - call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) + call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0) ret void } @@ -538,32 +852,67 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 -; GFX10_1-NEXT: s_add_i32 s59, s59, 64 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 +; GFX10_1-NEXT: s_add_i32 s55, s55, 64 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59 +; GFX10_1-NEXT: ; use s55 ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 -; GFX10_3-NEXT: s_add_i32 s59, s59, 64 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 +; GFX10_3-NEXT: s_add_i32 s55, s55, 64 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59 +; GFX10_3-NEXT: ; use s55 ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59 +; GFX11-NEXT: ; use s55 ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: @@ -573,44 +922,88 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset() #0 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s59, s32 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s32 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 +; GFX12-NEXT: s_mov_b32 s55, s32 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59 +; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_readlane_b32 s55, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s32 offset:16384 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s59, s32, 6 -; GFX8-NEXT: s_add_i32 s59, s59, 64 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v0, s55, 0 +; GFX8-NEXT: s_lshr_b32 s55, s32, 6 +; GFX8-NEXT: s_add_i32 s55, s55, 64 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59 +; GFX8-NEXT: ; use s55 ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v0, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX900-NEXT: s_lshr_b32 s59, s32, 6 -; GFX900-NEXT: s_add_i32 s59, s59, 64 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v0, s55, 0 +; GFX900-NEXT: s_lshr_b32 s55, s32, 6 +; GFX900-NEXT: s_add_i32 s55, s55, 64 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59 +; GFX900-NEXT: ; use s55 ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v0, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_add_i32 s0, s32, 64 -; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59 +; GFX942-NEXT: ; use s55 ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v0, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) - call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0) ret void } @@ -620,16 +1013,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s5, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 -; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800 -; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: s_mov_b32 s32, s33 -; GFX10_1-NEXT: s_mov_b32 s33, s5 -; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_1-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_1-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_mov_b32 s33, s5 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -637,16 +1043,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s5, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 -; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800 -; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: s_mov_b32 s32, s33 -; GFX10_3-NEXT: s_mov_b32 s33, s5 -; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v0 +; GFX10_3-NEXT: v_add_nc_u32_e32 v1, 64, v1 +; GFX10_3-NEXT: v_readfirstlane_b32 s55, v1 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_mov_b32 s33, s5 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -654,17 +1071,29 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s1, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_addk_i32 s32, 0x4040 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo +; GFX11-NEXT: v_writelane_b32 v0, s55, 0 ; GFX11-NEXT: s_addc_u32 s0, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 ; GFX11-NEXT: s_bitcmp1_b32 s0, 0 ; GFX11-NEXT: s_bitset0_b32 s0, 0 -; GFX11-NEXT: s_mov_b32 s33, s1 -; GFX11-NEXT: s_mov_b32 s59, s0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s55, s0 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_mov_b32 s33, s1 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -676,15 +1105,25 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s1, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 ; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s33 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v0, 0 ; GFX12-NEXT: s_mov_b32 s32, s33 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: s_mov_b32 s33, s1 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -693,17 +1132,28 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s6, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 -; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX8-NEXT: s_mov_b32 s59, 64 -; GFX8-NEXT: s_add_i32 s32, s32, 0x101000 -; GFX8-NEXT: v_add_u32_e32 v0, vcc, s59, v0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v0, s55, 0 +; GFX8-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX8-NEXT: s_mov_b32 s55, 64 +; GFX8-NEXT: v_add_u32_e32 v1, vcc, s55, v1 +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_readfirstlane_b32 s55, v1 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v0, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_mov_b32 s33, s6 +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -711,16 +1161,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s6, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 -; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX900-NEXT: s_add_i32 s32, s32, 0x101000 -; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s7 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_lshrrev_b32_e64 v1, 6, s33 +; GFX900-NEXT: v_add_u32_e32 v1, 64, v1 +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v0, s55, 0 +; GFX900-NEXT: v_readfirstlane_b32 s55, v1 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v0, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s7, s33, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s7 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_mov_b32 s33, s6 +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp: @@ -728,20 +1189,31 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc_small_offset_fp ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s2, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 -; GFX942-NEXT: s_addk_i32 s32, 0x4040 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s3 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: s_addc_u32 s0, s33, 64 ; GFX942-NEXT: s_bitcmp1_b32 s0, 0 ; GFX942-NEXT: s_bitset0_b32 s0, 0 -; GFX942-NEXT: s_mov_b32 s59, s0 +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s0 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v0, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s3, s33, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s3 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] ; GFX942-NEXT: s_mov_b32 s33, s2 +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) - call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca0, i32 0) + call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca0, i32 0) ret void } @@ -751,14 +1223,27 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_1-NEXT: s_mov_b32 s4, s33 ; GFX10_1-NEXT: s_mov_b32 s33, s32 -; GFX10_1-NEXT: s_add_i32 s32, s32, 0x80800 -; GFX10_1-NEXT: s_lshr_b32 s59, s33, 5 +; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 +; GFX10_1-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_1-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_1-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_1-NEXT: s_mov_b32 s32, s33 -; GFX10_1-NEXT: s_add_i32 s59, s59, 64 +; GFX10_1-NEXT: s_add_i32 s55, s55, 64 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59 +; GFX10_1-NEXT: ; use s55 ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_1-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_1-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_1-NEXT: s_mov_b32 s33, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -766,14 +1251,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10_3-NEXT: s_mov_b32 s4, s33 ; GFX10_3-NEXT: s_mov_b32 s33, s32 -; GFX10_3-NEXT: s_add_i32 s32, s32, 0x80800 -; GFX10_3-NEXT: s_lshr_b32 s59, s33, 5 +; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 +; GFX10_3-NEXT: v_writelane_b32 v0, s55, 0 +; GFX10_3-NEXT: s_add_i32 s32, s32, 0x81000 +; GFX10_3-NEXT: s_lshr_b32 s55, s33, 5 ; GFX10_3-NEXT: s_mov_b32 s32, s33 -; GFX10_3-NEXT: s_add_i32 s59, s59, 64 +; GFX10_3-NEXT: s_add_i32 s55, s55, 64 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59 +; GFX10_3-NEXT: ; use s55 ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v0, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s5, -1 +; GFX10_3-NEXT: s_add_i32 s6, s33, 0x80800 +; GFX10_3-NEXT: buffer_load_dword v0, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s5 ; GFX10_3-NEXT: s_mov_b32 s33, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -781,14 +1277,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, s33 ; GFX11-NEXT: s_mov_b32 s33, s32 -; GFX11-NEXT: s_addk_i32 s32, 0x4040 +; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: v_writelane_b32 v0, s55, 0 +; GFX11-NEXT: s_addk_i32 s32, 0x4080 ; GFX11-NEXT: s_add_i32 s1, s33, 64 ; GFX11-NEXT: s_mov_b32 s32, s33 -; GFX11-NEXT: s_mov_b32 s59, s1 +; GFX11-NEXT: s_mov_b32 s55, s1 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59 +; GFX11-NEXT: ; use s55 ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v0, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX11-NEXT: s_add_i32 s2, s33, 0x4040 +; GFX11-NEXT: scratch_load_b32 v0, off, s2 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_mov_b32 s33, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -800,14 +1307,24 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_mov_b32 s0, s33 ; GFX12-NEXT: s_mov_b32 s33, s32 -; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX12-NEXT: scratch_store_b32 off, v0, s33 offset:16384 ; 4-byte Folded Spill ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s33 +; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: v_writelane_b32 v0, s55, 0 +; GFX12-NEXT: s_addk_co_i32 s32, 0x4040 +; GFX12-NEXT: s_mov_b32 s55, s33 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59 +; GFX12-NEXT: ; use s55 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_mov_b32 s32, s33 +; GFX12-NEXT: v_readlane_b32 s55, v0, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX12-NEXT: scratch_load_b32 v0, off, s33 offset:16384 ; 4-byte Folded Reload +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s1 ; GFX12-NEXT: s_mov_b32 s33, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -816,14 +1333,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 s4, s33 ; GFX8-NEXT: s_mov_b32 s33, s32 -; GFX8-NEXT: s_add_i32 s32, s32, 0x101000 -; GFX8-NEXT: s_lshr_b32 s59, s33, 6 -; GFX8-NEXT: s_add_i32 s59, s59, 64 +; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[6:7] +; GFX8-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX8-NEXT: v_writelane_b32 v0, s55, 0 +; GFX8-NEXT: s_lshr_b32 s55, s33, 6 +; GFX8-NEXT: s_add_i32 s55, s55, 64 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59 +; GFX8-NEXT: ; use s55 ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v0, 0 ; GFX8-NEXT: s_mov_b32 s32, s33 +; GFX8-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX8-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b32 s33, s4 +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -831,14 +1359,25 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_mov_b32 s4, s33 ; GFX900-NEXT: s_mov_b32 s33, s32 -; GFX900-NEXT: s_add_i32 s32, s32, 0x101000 -; GFX900-NEXT: s_lshr_b32 s59, s33, 6 -; GFX900-NEXT: s_add_i32 s59, s59, 64 +; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: s_add_i32 s32, s32, 0x102000 +; GFX900-NEXT: v_writelane_b32 v0, s55, 0 +; GFX900-NEXT: s_lshr_b32 s55, s33, 6 +; GFX900-NEXT: s_add_i32 s55, s55, 64 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59 +; GFX900-NEXT: ; use s55 ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v0, 0 ; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: s_xor_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: s_add_i32 s5, s33, 0x101000 +; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[6:7] ; GFX900-NEXT: s_mov_b32 s33, s4 +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_available_scc_small_offset_fp: @@ -846,17 +1385,28 @@ define void @scalar_mov_materializes_frame_index_available_scc_small_offset_fp() ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX942-NEXT: s_mov_b32 s0, s33 ; GFX942-NEXT: s_mov_b32 s33, s32 -; GFX942-NEXT: s_addk_i32 s32, 0x4040 +; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 +; GFX942-NEXT: scratch_store_dword off, v0, s1 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[2:3] +; GFX942-NEXT: s_addk_i32 s32, 0x4080 ; GFX942-NEXT: s_add_i32 s1, s33, 64 -; GFX942-NEXT: s_mov_b32 s59, s1 +; GFX942-NEXT: v_writelane_b32 v0, s55, 0 +; GFX942-NEXT: s_mov_b32 s55, s1 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59 +; GFX942-NEXT: ; use s55 ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v0, 0 ; GFX942-NEXT: s_mov_b32 s32, s33 +; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX942-NEXT: s_add_i32 s1, s33, 0x4040 +; GFX942-NEXT: scratch_load_dword v0, off, s1 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_mov_b32 s33, s0 +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) - call void asm sideeffect "; use $0", "{s59}"(ptr addrspace(5) %alloca0) + call void asm sideeffect "; use $0", "{s55}"(ptr addrspace(5) %alloca0) ret void } @@ -864,48 +1414,83 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 -; GFX10_1-NEXT: s_add_i32 s59, s4, 0x442c +; GFX10_1-NEXT: s_add_i32 s55, s4, 0x442c ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 -; GFX10_3-NEXT: s_add_i32 s59, s4, 0x442c +; GFX10_3-NEXT: s_add_i32 s55, s4, 0x442c ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_store_b32 off, v1, s1 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_add_i32 s59, s32, 0x442c +; GFX11-NEXT: s_add_i32 s55, s32, 0x442c ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: @@ -915,23 +1500,38 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_add_co_i32 s59, s32, 0x43ec +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: v_writelane_b32 v1, s55, 0 +; GFX12-NEXT: s_add_co_i32 s55, s32, 0x43ec ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_lshr_b32 s4, s32, 6 -; GFX8-NEXT: s_add_i32 s59, s4, 0x442c +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 +; GFX8-NEXT: s_add_i32 s55, s4, 0x442c ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART @@ -939,15 +1539,26 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_lshr_b32 s4, s32, 6 -; GFX900-NEXT: s_add_i32 s59, s4, 0x442c +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 +; GFX900-NEXT: s_add_i32 s55, s4, 0x442c ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -955,14 +1566,25 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX942-NEXT: s_add_i32 s59, s32, 0x442c +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_store_dword off, v1, s2 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_add_i32 s55, s32, 0x442c ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: ;;#ASMSTART @@ -970,14 +1592,20 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_immoffset( ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 251 call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) - call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) + call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) ret void } @@ -985,54 +1613,89 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX10_1-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX10_1: ; %bb.0: ; GFX10_1-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshl_b32 s4, s16, 2 -; GFX10_1-NEXT: s_lshr_b32 s59, s32, 5 -; GFX10_1-NEXT: s_add_i32 s59, s59, s4 +; GFX10_1-NEXT: s_lshr_b32 s55, s32, 5 +; GFX10_1-NEXT: s_add_i32 s55, s55, s4 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_1-NEXT: s_addk_i32 s59, 0x4040 +; GFX10_1-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; use alloca0 v0 ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s59, scc +; GFX10_1-NEXT: ; use s55, scc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_1-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_1-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_1-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_1-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10_1-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_1-NEXT: s_waitcnt vmcnt(0) ; GFX10_1-NEXT: s_setpc_b64 s[30:31] ; ; GFX10_3-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX10_3: ; %bb.0: ; GFX10_3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_store_dword v1, off, s[0:3], s5 ; 4-byte Folded Spill +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: v_writelane_b32 v1, s55, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshl_b32 s4, s16, 2 -; GFX10_3-NEXT: s_lshr_b32 s59, s32, 5 -; GFX10_3-NEXT: s_add_i32 s59, s59, s4 +; GFX10_3-NEXT: s_lshr_b32 s55, s32, 5 +; GFX10_3-NEXT: s_add_i32 s55, s55, s4 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 -; GFX10_3-NEXT: s_addk_i32 s59, 0x4040 +; GFX10_3-NEXT: s_addk_i32 s55, 0x4040 ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; use alloca0 v0 ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s59, scc +; GFX10_3-NEXT: ; use s55, scc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: v_readlane_b32 s55, v1, 0 +; GFX10_3-NEXT: s_xor_saveexec_b32 s4, -1 +; GFX10_3-NEXT: s_add_i32 s5, s32, 0x100800 +; GFX10_3-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload +; GFX10_3-NEXT: s_mov_b32 exec_lo, s4 +; GFX10_3-NEXT: s_waitcnt vmcnt(0) ; GFX10_3-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX11-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX11-NEXT: scratch_store_b32 off, v1, s2 ; 4-byte Folded Spill +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_add_i32 s1, s32, 64 +; GFX11-NEXT: v_writelane_b32 v1, s55, 0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-NEXT: s_add_i32 s59, s32, s0 +; GFX11-NEXT: s_add_i32 s55, s32, s0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; use alloca0 v0 ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_addk_i32 s59, 0x4040 +; GFX11-NEXT: s_addk_i32 s55, 0x4040 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s59, scc +; GFX11-NEXT: ; use s55, scc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: v_readlane_b32 s55, v1, 0 +; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX11-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX11-NEXT: scratch_load_b32 v1, off, s1 ; 4-byte Folded Reload +; GFX11-NEXT: s_mov_b32 exec_lo, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: @@ -1042,29 +1705,44 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_xor_saveexec_b32 s1, -1 +; GFX12-NEXT: scratch_store_b32 off, v1, s32 offset:32768 ; 4-byte Folded Spill +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: v_writelane_b32 v1, s55, 0 ; GFX12-NEXT: s_lshl_b32 s0, s0, 2 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_add_co_i32 s59, s32, s0 +; GFX12-NEXT: s_add_co_i32 s55, s32, s0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; use alloca0 v0 ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_addk_co_i32 s59, 0x4000 +; GFX12-NEXT: s_addk_co_i32 s55, 0x4000 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s59, scc +; GFX12-NEXT: ; use s55, scc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: v_readlane_b32 s55, v1, 0 +; GFX12-NEXT: s_xor_saveexec_b32 s0, -1 +; GFX12-NEXT: scratch_load_b32 v1, off, s32 offset:32768 ; 4-byte Folded Reload ; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 exec_lo, s0 +; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: v_writelane_b32 v1, s55, 0 ; GFX8-NEXT: s_lshl_b32 s4, s16, 2 -; GFX8-NEXT: s_lshr_b32 s59, s32, 6 -; GFX8-NEXT: s_add_i32 s59, s59, s4 -; GFX8-NEXT: s_addk_i32 s59, 0x4040 +; GFX8-NEXT: s_lshr_b32 s55, s32, 6 +; GFX8-NEXT: s_add_i32 s55, s55, s4 +; GFX8-NEXT: s_addk_i32 s55, 0x4040 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 ; GFX8-NEXT: ;;#ASMSTART @@ -1072,17 +1750,28 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s59, scc +; GFX8-NEXT: ; use s55, scc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: v_readlane_b32 s55, v1, 0 +; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_mov_b64 exec, s[4:5] +; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX900: ; %bb.0: ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_store_dword v1, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: v_writelane_b32 v1, s55, 0 ; GFX900-NEXT: s_lshl_b32 s4, s16, 2 -; GFX900-NEXT: s_lshr_b32 s59, s32, 6 -; GFX900-NEXT: s_add_i32 s59, s59, s4 -; GFX900-NEXT: s_addk_i32 s59, 0x4040 +; GFX900-NEXT: s_lshr_b32 s55, s32, 6 +; GFX900-NEXT: s_add_i32 s55, s55, s4 +; GFX900-NEXT: s_addk_i32 s55, 0x4040 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 ; GFX900-NEXT: ;;#ASMSTART @@ -1090,16 +1779,27 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s59, scc +; GFX900-NEXT: ; use s55, scc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: v_readlane_b32 s55, v1, 0 +; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 +; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_load_dword v1, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[4:5] +; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offset: ; GFX942: ; %bb.0: ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_xor_saveexec_b64 s[2:3], -1 +; GFX942-NEXT: s_add_i32 s1, s32, 0x8040 +; GFX942-NEXT: scratch_store_dword off, v1, s1 ; 4-byte Folded Spill +; GFX942-NEXT: s_mov_b64 exec, s[2:3] ; GFX942-NEXT: s_lshl_b32 s0, s0, 2 -; GFX942-NEXT: s_add_i32 s59, s32, s0 -; GFX942-NEXT: s_addk_i32 s59, 0x4040 +; GFX942-NEXT: v_writelane_b32 v1, s55, 0 +; GFX942-NEXT: s_add_i32 s55, s32, s0 +; GFX942-NEXT: s_addk_i32 s55, 0x4040 ; GFX942-NEXT: s_add_i32 s0, s32, 64 ; GFX942-NEXT: v_mov_b32_e32 v0, s0 ; GFX942-NEXT: ;;#ASMSTART @@ -1107,14 +1807,20 @@ define void @scalar_mov_materializes_frame_index_unavailable_scc__gep_sgpr_offse ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: s_and_b64 s[0:1], 0, exec ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s59, scc +; GFX942-NEXT: ; use s55, scc ; GFX942-NEXT: ;;#ASMEND +; GFX942-NEXT: v_readlane_b32 s55, v1, 0 +; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1 +; GFX942-NEXT: s_add_i32 s2, s32, 0x8040 +; GFX942-NEXT: scratch_load_dword v1, off, s2 ; 4-byte Folded Reload +; GFX942-NEXT: s_mov_b64 exec, s[0:1] +; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: s_setpc_b64 s[30:31] %alloca0 = alloca [4096 x i32], align 64, addrspace(5) %alloca1 = alloca [4096 x i32], align 4, addrspace(5) %alloca1.offset = getelementptr [4096 x i32], ptr addrspace(5) %alloca1, i32 0, i32 %soffset call void asm sideeffect "; use alloca0 $0", "v"(ptr addrspace(5) %alloca0) - call void asm sideeffect "; use $0, $1", "{s59},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) + call void asm sideeffect "; use $0, $1", "{s55},{scc}"(ptr addrspace(5) %alloca1.offset, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll index e8dacc93a8f3c..17581bcb61e99 100644 --- a/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll +++ b/llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll @@ -67,11 +67,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX7-NEXT: v_mov_b32_e32 v0, 0x4040 ; GFX7-NEXT: v_mad_u32_u24 v0, v0, 64, s32 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 6, v0 -; GFX7-NEXT: v_readfirstlane_b32 s59, v0 +; GFX7-NEXT: v_readfirstlane_b32 s54, v0 ; GFX7-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: ;;#ASMSTART -; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s55, v23, 16 ; GFX7-NEXT: v_readlane_b32 s54, v23, 15 @@ -133,12 +133,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX8-NEXT: v_mov_b32_e32 v0, 0x4040 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, 64, s32 +; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 6, v0 -; GFX8-NEXT: v_readfirstlane_b32 s59, v0 +; GFX8-NEXT: v_readfirstlane_b32 s54, v0 ; GFX8-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v23, 16 ; GFX8-NEXT: v_readlane_b32 s54, v23, 15 @@ -199,12 +200,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 +; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX900-NEXT: v_add_u32_e32 v0, 0x4040, v0 -; GFX900-NEXT: v_readfirstlane_b32 s59, v0 +; GFX900-NEXT: v_readfirstlane_b32 s54, v0 ; GFX900-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; GFX900-NEXT: s_waitcnt vmcnt(0) ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v23, 16 ; GFX900-NEXT: v_readlane_b32 s54, v23, 15 @@ -263,12 +265,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_addc_u32 s60, s32, 0x4040 -; GFX942-NEXT: s_bitcmp1_b32 s60, 0 -; GFX942-NEXT: s_bitset0_b32 s60, 0 -; GFX942-NEXT: s_mov_b32 s59, s60 +; GFX942-NEXT: s_addc_u32 s59, s32, 0x4040 +; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX942-NEXT: s_bitcmp1_b32 s59, 0 +; GFX942-NEXT: s_bitset0_b32 s59, 0 +; GFX942-NEXT: s_mov_b32 s54, s59 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v23, 16 ; GFX942-NEXT: v_readlane_b32 s54, v23, 15 @@ -329,10 +332,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v24, 5, s32 +; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_1-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v24 +; GFX10_1-NEXT: v_readfirstlane_b32 s54, v24 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v23, 16 ; GFX10_1-NEXT: v_readlane_b32 s54, v23, 15 @@ -393,10 +397,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v24, 5, s32 +; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_3-NEXT: v_add_nc_u32_e32 v24, 0x4040, v24 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v24 +; GFX10_3-NEXT: v_readfirstlane_b32 s54, v24 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v23, 16 ; GFX10_3-NEXT: v_readlane_b32 s54, v23, 15 @@ -456,13 +461,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_addc_u32 s60, s32, 0x4040 +; GFX11-NEXT: s_addc_u32 s59, s32, 0x4040 +; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_bitcmp1_b32 s60, 0 -; GFX11-NEXT: s_bitset0_b32 s60, 0 -; GFX11-NEXT: s_mov_b32 s59, s60 +; GFX11-NEXT: s_bitcmp1_b32 s59, 0 +; GFX11-NEXT: s_bitset0_b32 s59, 0 +; GFX11-NEXT: s_mov_b32 s54, s59 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v23, 16 ; GFX11-NEXT: v_readlane_b32 s54, v23, 15 @@ -524,14 +530,15 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_add_co_ci_u32 s60, s32, 0x4000 +; GFX12-NEXT: s_add_co_ci_u32 s59, s32, 0x4000 +; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_bitcmp1_b32 s60, 0 -; GFX12-NEXT: s_bitset0_b32 s60, 0 +; GFX12-NEXT: s_bitcmp1_b32 s59, 0 +; GFX12-NEXT: s_bitset0_b32 s59, 0 ; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_mov_b32 s59, s60 +; GFX12-NEXT: s_mov_b32 s54, s59 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s59, scc +; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:22], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: v_readlane_b32 s55, v23, 16 ; GFX12-NEXT: v_readlane_b32 s54, v23, 15 @@ -579,7 +586,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", - "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"( + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s54},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2, @@ -629,9 +636,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_mad_u32_u24 v22, 16, 64, s32 ; GFX7-NEXT: v_lshrrev_b32_e32 v22, 6, v22 -; GFX7-NEXT: v_readfirstlane_b32 s59, v22 +; GFX7-NEXT: v_readfirstlane_b32 s54, v22 ; GFX7-NEXT: ;;#ASMSTART -; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s55, v21, 16 ; GFX7-NEXT: v_readlane_b32 s54, v21, 15 @@ -686,10 +693,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_mad_u32_u24 v22, 16, 64, s32 +; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX8-NEXT: v_lshrrev_b32_e32 v22, 6, v22 -; GFX8-NEXT: v_readfirstlane_b32 s59, v22 +; GFX8-NEXT: v_readfirstlane_b32 s54, v22 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: v_readlane_b32 s55, v21, 16 ; GFX8-NEXT: v_readlane_b32 s54, v21, 15 @@ -744,10 +752,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_lshrrev_b32_e64 v22, 6, s32 +; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX900-NEXT: v_add_u32_e32 v22, 16, v22 -; GFX900-NEXT: v_readfirstlane_b32 s59, v22 +; GFX900-NEXT: v_readfirstlane_b32 s54, v22 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: v_readlane_b32 s55, v21, 16 ; GFX900-NEXT: v_readlane_b32 s54, v21, 15 @@ -801,12 +810,13 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_addc_u32 s60, s32, 16 -; GFX942-NEXT: s_bitcmp1_b32 s60, 0 -; GFX942-NEXT: s_bitset0_b32 s60, 0 -; GFX942-NEXT: s_mov_b32 s59, s60 +; GFX942-NEXT: s_addc_u32 s59, s32, 16 +; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX942-NEXT: s_bitcmp1_b32 s59, 0 +; GFX942-NEXT: s_bitset0_b32 s59, 0 +; GFX942-NEXT: s_mov_b32 s54, s59 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v21, 16 ; GFX942-NEXT: v_readlane_b32 s54, v21, 15 @@ -862,10 +872,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_lshrrev_b32_e64 v22, 5, s32 +; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_1-NEXT: v_add_nc_u32_e32 v22, 16, v22 -; GFX10_1-NEXT: v_readfirstlane_b32 s59, v22 +; GFX10_1-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v21, 16 ; GFX10_1-NEXT: v_readlane_b32 s54, v21, 15 @@ -921,10 +932,11 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_lshrrev_b32_e64 v22, 5, s32 +; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX10_3-NEXT: v_add_nc_u32_e32 v22, 16, v22 -; GFX10_3-NEXT: v_readfirstlane_b32 s59, v22 +; GFX10_3-NEXT: v_readfirstlane_b32 s54, v22 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v21, 16 ; GFX10_3-NEXT: v_readlane_b32 s54, v21, 15 @@ -978,13 +990,14 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_addc_u32 s60, s32, 16 +; GFX11-NEXT: s_addc_u32 s59, s32, 16 +; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_bitcmp1_b32 s60, 0 -; GFX11-NEXT: s_bitset0_b32 s60, 0 -; GFX11-NEXT: s_mov_b32 s59, s60 +; GFX11-NEXT: s_bitcmp1_b32 s59, 0 +; GFX11-NEXT: s_bitset0_b32 s59, 0 +; GFX11-NEXT: s_mov_b32 s54, s59 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: v_readlane_b32 s55, v21, 16 ; GFX11-NEXT: v_readlane_b32 s54, v21, 15 @@ -1042,9 +1055,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_mov_b32 s59, s32 +; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX12-NEXT: s_mov_b32 s54, s32 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s59, scc +; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], s58, v[0:15], v[16:20], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v21, 16 @@ -1091,7 +1105,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs__lowe ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", - "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s59},{scc}"( + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:20]},{vcc},{s54},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2, @@ -1151,9 +1165,9 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX7-NEXT: ;;#ASMSTART ; GFX7-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX7-NEXT: ;;#ASMEND -; GFX7-NEXT: v_readlane_b32 s59, v22, 0 +; GFX7-NEXT: v_readlane_b32 s54, v22, 0 ; GFX7-NEXT: ;;#ASMSTART -; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX7-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX7-NEXT: ;;#ASMEND ; GFX7-NEXT: v_readlane_b32 s55, v23, 16 ; GFX7-NEXT: v_readlane_b32 s54, v23, 15 @@ -1188,58 +1202,66 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX8-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX8-NEXT: s_mov_b64 exec, s[4:5] -; GFX8-NEXT: v_writelane_b32 v22, s30, 0 -; GFX8-NEXT: v_writelane_b32 v22, s31, 1 -; GFX8-NEXT: v_writelane_b32 v22, s33, 2 -; GFX8-NEXT: v_writelane_b32 v22, s34, 3 -; GFX8-NEXT: v_writelane_b32 v22, s35, 4 -; GFX8-NEXT: v_writelane_b32 v22, s36, 5 -; GFX8-NEXT: v_writelane_b32 v22, s37, 6 -; GFX8-NEXT: v_writelane_b32 v22, s38, 7 -; GFX8-NEXT: v_writelane_b32 v22, s39, 8 -; GFX8-NEXT: v_writelane_b32 v22, s48, 9 -; GFX8-NEXT: v_writelane_b32 v22, s49, 10 -; GFX8-NEXT: v_writelane_b32 v22, s50, 11 -; GFX8-NEXT: v_writelane_b32 v22, s51, 12 -; GFX8-NEXT: v_writelane_b32 v22, s52, 13 -; GFX8-NEXT: s_lshr_b32 s4, s32, 6 -; GFX8-NEXT: v_writelane_b32 v22, s53, 14 +; GFX8-NEXT: v_writelane_b32 v23, s30, 0 +; GFX8-NEXT: v_writelane_b32 v23, s31, 1 +; GFX8-NEXT: v_writelane_b32 v23, s33, 2 +; GFX8-NEXT: v_writelane_b32 v23, s34, 3 +; GFX8-NEXT: v_writelane_b32 v23, s35, 4 +; GFX8-NEXT: v_writelane_b32 v23, s36, 5 +; GFX8-NEXT: v_writelane_b32 v23, s37, 6 +; GFX8-NEXT: v_writelane_b32 v23, s38, 7 +; GFX8-NEXT: v_writelane_b32 v23, s39, 8 +; GFX8-NEXT: v_writelane_b32 v23, s48, 9 +; GFX8-NEXT: v_writelane_b32 v23, s49, 10 +; GFX8-NEXT: v_writelane_b32 v23, s50, 11 +; GFX8-NEXT: v_writelane_b32 v23, s51, 12 +; GFX8-NEXT: v_writelane_b32 v23, s52, 13 +; GFX8-NEXT: s_lshr_b32 s5, s32, 6 +; GFX8-NEXT: v_writelane_b32 v23, s53, 14 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX8-NEXT: s_add_i32 s59, s4, 0x4240 -; GFX8-NEXT: v_writelane_b32 v22, s54, 15 +; GFX8-NEXT: s_add_i32 s4, s5, 0x4240 +; GFX8-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane +; GFX8-NEXT: v_writelane_b32 v23, s54, 15 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 64, v0 +; GFX8-NEXT: v_writelane_b32 v22, s4, 0 ; GFX8-NEXT: s_and_b64 s[4:5], 0, exec -; GFX8-NEXT: v_writelane_b32 v22, s55, 16 +; GFX8-NEXT: v_writelane_b32 v23, s55, 16 ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; use alloca0 v0 ; GFX8-NEXT: ;;#ASMEND ; GFX8-NEXT: ;;#ASMSTART ; GFX8-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX8-NEXT: ;;#ASMEND +; GFX8-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX8-NEXT: v_readlane_b32 s54, v22, 0 ; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX8-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: v_readlane_b32 s55, v22, 16 -; GFX8-NEXT: v_readlane_b32 s54, v22, 15 -; GFX8-NEXT: v_readlane_b32 s53, v22, 14 -; GFX8-NEXT: v_readlane_b32 s52, v22, 13 -; GFX8-NEXT: v_readlane_b32 s51, v22, 12 -; GFX8-NEXT: v_readlane_b32 s50, v22, 11 -; GFX8-NEXT: v_readlane_b32 s49, v22, 10 -; GFX8-NEXT: v_readlane_b32 s48, v22, 9 -; GFX8-NEXT: v_readlane_b32 s39, v22, 8 -; GFX8-NEXT: v_readlane_b32 s38, v22, 7 -; GFX8-NEXT: v_readlane_b32 s37, v22, 6 -; GFX8-NEXT: v_readlane_b32 s36, v22, 5 -; GFX8-NEXT: v_readlane_b32 s35, v22, 4 -; GFX8-NEXT: v_readlane_b32 s34, v22, 3 -; GFX8-NEXT: v_readlane_b32 s33, v22, 2 -; GFX8-NEXT: v_readlane_b32 s31, v22, 1 -; GFX8-NEXT: v_readlane_b32 s30, v22, 0 +; GFX8-NEXT: v_readlane_b32 s55, v23, 16 +; GFX8-NEXT: v_readlane_b32 s54, v23, 15 +; GFX8-NEXT: v_readlane_b32 s53, v23, 14 +; GFX8-NEXT: v_readlane_b32 s52, v23, 13 +; GFX8-NEXT: v_readlane_b32 s51, v23, 12 +; GFX8-NEXT: v_readlane_b32 s50, v23, 11 +; GFX8-NEXT: v_readlane_b32 s49, v23, 10 +; GFX8-NEXT: v_readlane_b32 s48, v23, 9 +; GFX8-NEXT: v_readlane_b32 s39, v23, 8 +; GFX8-NEXT: v_readlane_b32 s38, v23, 7 +; GFX8-NEXT: v_readlane_b32 s37, v23, 6 +; GFX8-NEXT: v_readlane_b32 s36, v23, 5 +; GFX8-NEXT: v_readlane_b32 s35, v23, 4 +; GFX8-NEXT: v_readlane_b32 s34, v23, 3 +; GFX8-NEXT: v_readlane_b32 s33, v23, 2 +; GFX8-NEXT: v_readlane_b32 s31, v23, 1 +; GFX8-NEXT: v_readlane_b32 s30, v23, 0 ; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX8-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX8-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX8-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX8-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX8-NEXT: s_mov_b64 exec, s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) @@ -1250,58 +1272,66 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_store_dword v23, off, s[0:3], s6 ; 4-byte Folded Spill +; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX900-NEXT: buffer_store_dword v22, off, s[0:3], s6 ; 4-byte Folded Spill ; GFX900-NEXT: s_mov_b64 exec, s[4:5] -; GFX900-NEXT: v_writelane_b32 v22, s30, 0 -; GFX900-NEXT: v_writelane_b32 v22, s31, 1 -; GFX900-NEXT: v_writelane_b32 v22, s33, 2 -; GFX900-NEXT: v_writelane_b32 v22, s34, 3 -; GFX900-NEXT: v_writelane_b32 v22, s35, 4 -; GFX900-NEXT: v_writelane_b32 v22, s36, 5 -; GFX900-NEXT: v_writelane_b32 v22, s37, 6 -; GFX900-NEXT: v_writelane_b32 v22, s38, 7 -; GFX900-NEXT: v_writelane_b32 v22, s39, 8 -; GFX900-NEXT: v_writelane_b32 v22, s48, 9 -; GFX900-NEXT: v_writelane_b32 v22, s49, 10 -; GFX900-NEXT: v_writelane_b32 v22, s50, 11 -; GFX900-NEXT: v_writelane_b32 v22, s51, 12 -; GFX900-NEXT: v_writelane_b32 v22, s52, 13 -; GFX900-NEXT: s_lshr_b32 s4, s32, 6 -; GFX900-NEXT: v_writelane_b32 v22, s53, 14 +; GFX900-NEXT: v_writelane_b32 v23, s30, 0 +; GFX900-NEXT: v_writelane_b32 v23, s31, 1 +; GFX900-NEXT: v_writelane_b32 v23, s33, 2 +; GFX900-NEXT: v_writelane_b32 v23, s34, 3 +; GFX900-NEXT: v_writelane_b32 v23, s35, 4 +; GFX900-NEXT: v_writelane_b32 v23, s36, 5 +; GFX900-NEXT: v_writelane_b32 v23, s37, 6 +; GFX900-NEXT: v_writelane_b32 v23, s38, 7 +; GFX900-NEXT: v_writelane_b32 v23, s39, 8 +; GFX900-NEXT: v_writelane_b32 v23, s48, 9 +; GFX900-NEXT: v_writelane_b32 v23, s49, 10 +; GFX900-NEXT: v_writelane_b32 v23, s50, 11 +; GFX900-NEXT: v_writelane_b32 v23, s51, 12 +; GFX900-NEXT: v_writelane_b32 v23, s52, 13 +; GFX900-NEXT: s_lshr_b32 s5, s32, 6 +; GFX900-NEXT: v_writelane_b32 v23, s53, 14 ; GFX900-NEXT: v_lshrrev_b32_e64 v0, 6, s32 -; GFX900-NEXT: s_add_i32 s59, s4, 0x4240 -; GFX900-NEXT: v_writelane_b32 v22, s54, 15 +; GFX900-NEXT: s_add_i32 s4, s5, 0x4240 +; GFX900-NEXT: ; implicit-def: $vgpr22 : SGPR spill to VGPR lane +; GFX900-NEXT: v_writelane_b32 v23, s54, 15 ; GFX900-NEXT: v_add_u32_e32 v0, 64, v0 +; GFX900-NEXT: v_writelane_b32 v22, s4, 0 ; GFX900-NEXT: s_and_b64 s[4:5], 0, exec -; GFX900-NEXT: v_writelane_b32 v22, s55, 16 +; GFX900-NEXT: v_writelane_b32 v23, s55, 16 ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; use alloca0 v0 ; GFX900-NEXT: ;;#ASMEND ; GFX900-NEXT: ;;#ASMSTART ; GFX900-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX900-NEXT: v_readlane_b32 s54, v22, 0 ; GFX900-NEXT: ;;#ASMSTART -; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX900-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX900-NEXT: ;;#ASMEND -; GFX900-NEXT: v_readlane_b32 s55, v22, 16 -; GFX900-NEXT: v_readlane_b32 s54, v22, 15 -; GFX900-NEXT: v_readlane_b32 s53, v22, 14 -; GFX900-NEXT: v_readlane_b32 s52, v22, 13 -; GFX900-NEXT: v_readlane_b32 s51, v22, 12 -; GFX900-NEXT: v_readlane_b32 s50, v22, 11 -; GFX900-NEXT: v_readlane_b32 s49, v22, 10 -; GFX900-NEXT: v_readlane_b32 s48, v22, 9 -; GFX900-NEXT: v_readlane_b32 s39, v22, 8 -; GFX900-NEXT: v_readlane_b32 s38, v22, 7 -; GFX900-NEXT: v_readlane_b32 s37, v22, 6 -; GFX900-NEXT: v_readlane_b32 s36, v22, 5 -; GFX900-NEXT: v_readlane_b32 s35, v22, 4 -; GFX900-NEXT: v_readlane_b32 s34, v22, 3 -; GFX900-NEXT: v_readlane_b32 s33, v22, 2 -; GFX900-NEXT: v_readlane_b32 s31, v22, 1 -; GFX900-NEXT: v_readlane_b32 s30, v22, 0 +; GFX900-NEXT: v_readlane_b32 s55, v23, 16 +; GFX900-NEXT: v_readlane_b32 s54, v23, 15 +; GFX900-NEXT: v_readlane_b32 s53, v23, 14 +; GFX900-NEXT: v_readlane_b32 s52, v23, 13 +; GFX900-NEXT: v_readlane_b32 s51, v23, 12 +; GFX900-NEXT: v_readlane_b32 s50, v23, 11 +; GFX900-NEXT: v_readlane_b32 s49, v23, 10 +; GFX900-NEXT: v_readlane_b32 s48, v23, 9 +; GFX900-NEXT: v_readlane_b32 s39, v23, 8 +; GFX900-NEXT: v_readlane_b32 s38, v23, 7 +; GFX900-NEXT: v_readlane_b32 s37, v23, 6 +; GFX900-NEXT: v_readlane_b32 s36, v23, 5 +; GFX900-NEXT: v_readlane_b32 s35, v23, 4 +; GFX900-NEXT: v_readlane_b32 s34, v23, 3 +; GFX900-NEXT: v_readlane_b32 s33, v23, 2 +; GFX900-NEXT: v_readlane_b32 s31, v23, 1 +; GFX900-NEXT: v_readlane_b32 s30, v23, 0 ; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 ; GFX900-NEXT: s_add_i32 s6, s32, 0x201000 +; GFX900-NEXT: buffer_load_dword v23, off, s[0:3], s6 ; 4-byte Folded Reload +; GFX900-NEXT: s_add_i32 s6, s32, 0x201100 ; GFX900-NEXT: buffer_load_dword v22, off, s[0:3], s6 ; 4-byte Folded Reload ; GFX900-NEXT: s_mov_b64 exec, s[4:5] ; GFX900-NEXT: s_waitcnt vmcnt(0) @@ -1339,10 +1369,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX942-NEXT: ;;#ASMSTART ; GFX942-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX942-NEXT: ;;#ASMEND -; GFX942-NEXT: s_add_i32 s59, s32, 0x4240 +; GFX942-NEXT: s_add_i32 s58, s32, 0x4240 +; GFX942-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 ; GFX942-NEXT: s_and_b64 s[60:61], 0, exec +; GFX942-NEXT: s_mov_b32 s54, s58 ; GFX942-NEXT: ;;#ASMSTART -; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX942-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX942-NEXT: ;;#ASMEND ; GFX942-NEXT: v_readlane_b32 s55, v22, 16 ; GFX942-NEXT: v_readlane_b32 s54, v22, 15 @@ -1379,7 +1411,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: v_writelane_b32 v22, s30, 0 ; GFX10_1-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_1-NEXT: s_lshr_b32 s4, s32, 5 -; GFX10_1-NEXT: s_add_i32 s59, s4, 0x4240 +; GFX10_1-NEXT: s_add_i32 s58, s4, 0x4240 ; GFX10_1-NEXT: v_writelane_b32 v22, s31, 1 ; GFX10_1-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_1-NEXT: s_and_b32 s4, 0, exec_lo @@ -1404,8 +1436,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_1-NEXT: ;;#ASMSTART ; GFX10_1-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_1-NEXT: ;;#ASMEND +; GFX10_1-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX10_1-NEXT: s_mov_b32 s54, s58 ; GFX10_1-NEXT: ;;#ASMSTART -; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX10_1-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_1-NEXT: ;;#ASMEND ; GFX10_1-NEXT: v_readlane_b32 s55, v22, 16 ; GFX10_1-NEXT: v_readlane_b32 s54, v22, 15 @@ -1442,7 +1476,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: v_writelane_b32 v22, s30, 0 ; GFX10_3-NEXT: v_lshrrev_b32_e64 v0, 5, s32 ; GFX10_3-NEXT: s_lshr_b32 s4, s32, 5 -; GFX10_3-NEXT: s_add_i32 s59, s4, 0x4240 +; GFX10_3-NEXT: s_add_i32 s58, s4, 0x4240 ; GFX10_3-NEXT: v_writelane_b32 v22, s31, 1 ; GFX10_3-NEXT: v_add_nc_u32_e32 v0, 64, v0 ; GFX10_3-NEXT: s_and_b32 s4, 0, exec_lo @@ -1467,8 +1501,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX10_3-NEXT: ;;#ASMSTART ; GFX10_3-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX10_3-NEXT: ;;#ASMEND +; GFX10_3-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX10_3-NEXT: s_mov_b32 s54, s58 ; GFX10_3-NEXT: ;;#ASMSTART -; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX10_3-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX10_3-NEXT: ;;#ASMEND ; GFX10_3-NEXT: v_readlane_b32 s55, v22, 16 ; GFX10_3-NEXT: v_readlane_b32 s54, v22, 15 @@ -1503,7 +1539,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: v_writelane_b32 v22, s30, 0 ; GFX11-NEXT: s_add_i32 s0, s32, 64 -; GFX11-NEXT: s_add_i32 s59, s32, 0x4240 +; GFX11-NEXT: s_add_i32 s58, s32, 0x4240 ; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_and_b32 s0, 0, exec_lo ; GFX11-NEXT: v_writelane_b32 v22, s31, 1 @@ -1528,8 +1564,10 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX11-NEXT: ;;#ASMSTART ; GFX11-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX11-NEXT: ;;#ASMEND +; GFX11-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX11-NEXT: s_mov_b32 s54, s58 ; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX11-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX11-NEXT: ;;#ASMEND ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_readlane_b32 s55, v22, 16 @@ -1568,7 +1606,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: s_wait_alu 0xfffe ; GFX12-NEXT: s_mov_b32 exec_lo, s0 ; GFX12-NEXT: v_writelane_b32 v22, s30, 0 -; GFX12-NEXT: s_add_co_i32 s59, s32, 0x4200 +; GFX12-NEXT: s_add_co_i32 s58, s32, 0x4200 ; GFX12-NEXT: v_mov_b32_e32 v0, s32 ; GFX12-NEXT: s_and_b32 s0, 0, exec_lo ; GFX12-NEXT: ;;#ASMSTART @@ -1593,10 +1631,12 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; GFX12-NEXT: ;;#ASMSTART ; GFX12-NEXT: ; def s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc ; GFX12-NEXT: ;;#ASMEND +; GFX12-NEXT: ; kill: def $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 killed $sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55 def $sgpr54 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_mov_b32 s54, s58 ; GFX12-NEXT: ;;#ASMSTART -; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s59, scc +; GFX12-NEXT: ; use s[0:15], s[16:31], s[32:47], s[48:55], s[56:57], v[0:15], v[16:21], vcc, s54, scc ; GFX12-NEXT: ;;#ASMEND -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_readlane_b32 s55, v22, 16 ; GFX12-NEXT: v_readlane_b32 s54, v22, 15 ; GFX12-NEXT: v_readlane_b32 s53, v22, 14 @@ -1644,7 +1684,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs_gep_i ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9", - "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s59},{scc}"( + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{v[0:15]},{v[16:21]},{vcc},{s54},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2, diff --git a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll index 79187f51af0d2..f70cd6816a966 100644 --- a/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll +++ b/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg-crash.ll @@ -44,7 +44,7 @@ define void @scalar_mov_materializes_frame_index_no_live_scc_no_live_sgprs() #0 ; scc is unavailable since it is live in call void asm sideeffect "; use $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10", - "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s59},{scc}"( + "{s[0:15]},{s[16:31]},{s[32:47]},{s[48:55]},{s[56:57]},{s58},{v[0:15]},{v[16:22]},{vcc},{s64},{scc}"( <16 x i32> %s0, <16 x i32> %s1, <16 x i32> %s2,