diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1b66f5d06ea26..792f4695d288b 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3250,9 +3250,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); // Multiplied part is the constant: Use v_madmk_{f16, f32}. - // We should only expect these to be on src0 due to canonicalization. - if (Src0->isReg() && Src0->getReg() == Reg) { - if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))) + if ((Src0->isReg() && Src0->getReg() == Reg) || + (Src1->isReg() && Src1->getReg() == Reg)) { + MachineOperand *RegSrc = + Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1; + if (!RegSrc->isReg() || + RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg()))) return false; if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg()))) @@ -3266,18 +3269,22 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (pseudoToMCOpcode(NewOpc) == -1) return false; - // We need to swap operands 0 and 1 since madmk constant is at operand 1. + // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite + // would also require restricting their register classes. For now + // just bail out. + if (NewOpc == AMDGPU::V_FMAMK_F16_t16) + return false; const int64_t Imm = ImmOp->getImm(); // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. - Register Src1Reg = Src1->getReg(); - unsigned Src1SubReg = Src1->getSubReg(); - Src0->setReg(Src1Reg); - Src0->setSubReg(Src1SubReg); - Src0->setIsKill(Src1->isKill()); + Register SrcReg = RegSrc->getReg(); + unsigned SrcSubReg = RegSrc->getSubReg(); + Src0->setReg(SrcReg); + Src0->setSubReg(SrcSubReg); + Src0->setIsKill(RegSrc->isKill()); if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index 1b8216f4aa2a6..c793f9ee682f8 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -7149,7 +7149,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_movk_i32 s8, 0x11f @@ -7269,7 +7269,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 @@ -7533,21 +7533,21 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, < ; GFX6-NEXT: v_madak_f32 v0, 0, v0, 0x457ff000 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: s_movk_i32 s6, 0xf001 -; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd ; GFX6-NEXT: s_movk_i32 s8, 0xfff +; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 12 -; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd ; GFX6-NEXT: v_mul_hi_u32 v2, v0, s6 ; GFX6-NEXT: v_mul_lo_u32 v4, v1, s6 ; GFX6-NEXT: v_mul_lo_u32 v3, v0, s6 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 12 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v0 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GFX6-NEXT: v_mul_hi_u32 v5, v0, v3 @@ -7647,7 +7647,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, < ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -7834,7 +7834,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -7954,7 +7954,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 @@ -8283,7 +8283,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -8399,7 +8399,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 @@ -8589,14 +8589,14 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_sub_u32 s4, 0, s10 ; GFX6-NEXT: s_subb_u32 s5, 0, s11 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s12, s3, 31 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_add_u32 s2, s2, s12 @@ -8724,13 +8724,13 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_sub_u32 s0, 0, s8 ; GFX9-NEXT: s_subb_u32 s1, 0, s9 -; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX9-NEXT: v_rcp_f32_e32 v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 ; GFX9-NEXT: v_trunc_f32_e32 v2, v2 -; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_readfirstlane_b32 s10, v2 @@ -8944,14 +8944,14 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, ; GFX6-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX6-NEXT: v_mac_f32_e32 v0, 0, v1 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 -; GFX6-NEXT: s_movk_i32 s6, 0xf001 ; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd +; GFX6-NEXT: s_movk_i32 s6, 0xf001 ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) @@ -9073,7 +9073,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, ; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 ; GFX9-NEXT: v_trunc_f32_e32 v2, v2 -; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -9789,7 +9789,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_mov_b32 s2, -1 @@ -9903,7 +9903,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) { ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 @@ -10093,14 +10093,14 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_sub_u32 s4, 0, s8 ; GFX6-NEXT: s_subb_u32 s5, 0, s9 ; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_ashr_i32 s10, s3, 31 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 -; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: s_add_u32 s2, s2, s10 @@ -10226,13 +10226,13 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 ; GFX9-NEXT: s_sub_u32 s0, 0, s8 ; GFX9-NEXT: s_subb_u32 s1, 0, s9 -; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX9-NEXT: v_rcp_f32_e32 v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 ; GFX9-NEXT: v_trunc_f32_e32 v2, v2 -; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; GFX9-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_readfirstlane_b32 s2, v2 diff --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll index 2184478635e0e..cb1b664549c9a 100644 --- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll +++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll @@ -25,12 +25,12 @@ define i64 @sdiv64(i64 %a, i64 %b) { ; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 ; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 ; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc -; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GFX9-NEXT: v_rcp_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GFX9-NEXT: v_trunc_f32_e32 v3, v3 -; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 ; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 @@ -171,12 +171,12 @@ define i64 @udiv64(i64 %a, i64 %b) { ; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 ; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc -; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 ; GFX9-NEXT: v_rcp_f32_e32 v4, v4 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 ; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 ; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 ; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 @@ -312,12 +312,12 @@ define i64 @srem64(i64 %a, i64 %b) { ; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9 ; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v10 ; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc -; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GFX9-NEXT: v_rcp_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GFX9-NEXT: v_trunc_f32_e32 v3, v3 -; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 ; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v3 ; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 @@ -454,12 +454,12 @@ define i64 @urem64(i64 %a, i64 %b) { ; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 ; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc -; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 ; GFX9-NEXT: v_rcp_f32_e32 v4, v4 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 ; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 ; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 ; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 @@ -709,118 +709,118 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) { ; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GFX9-NEXT: s_cbranch_execz .LBB8_2 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v3 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v2, v11 -; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v3, v11, vcc -; GFX9-NEXT: v_xor_b32_e32 v2, v2, v11 -; GFX9-NEXT: v_xor_b32_e32 v3, v4, v11 -; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v3 -; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v2 -; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, 0, v3 -; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v2, vcc -; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; GFX9-NEXT: v_rcp_f32_e32 v4, v4 -; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 -; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 -; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v4 -; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v5 -; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v8, 0 -; GFX9-NEXT: v_mul_lo_u32 v7, v9, v12 -; GFX9-NEXT: v_mul_hi_u32 v13, v8, v4 -; GFX9-NEXT: v_add3_u32 v7, v5, v7, v6 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v7, 0 -; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v5 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v4, 0 -; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v6, vcc -; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v7, 0 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v5, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v8, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v5, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v9, v12 -; GFX9-NEXT: v_mul_lo_u32 v7, v10, v13 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v13, 0 -; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v13, v7, 0 -; GFX9-NEXT: v_mul_hi_u32 v14, v13, v4 -; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v4, 0 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v14, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 +; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 +; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 +; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 +; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc +; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 +; GFX9-NEXT: v_rcp_f32_e32 v2, v2 +; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 +; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 +; GFX9-NEXT: v_trunc_f32_e32 v3, v3 +; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 +; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 +; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 +; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 +; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 +; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 +; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 +; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 +; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 +; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v12, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7 -; GFX9-NEXT: v_xor_b32_e32 v8, v0, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v7, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v5, 0 -; GFX9-NEXT: v_mul_hi_u32 v9, v8, v4 -; GFX9-NEXT: v_xor_b32_e32 v6, v6, v7 -; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v9, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, 0, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v5, 0 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v9, v0 -; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v10, v1, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc -; GFX9-NEXT: v_mul_lo_u32 v9, v2, v4 -; GFX9-NEXT: v_mul_lo_u32 v10, v3, v5 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v4, 0 -; GFX9-NEXT: v_add3_u32 v1, v1, v10, v9 -; GFX9-NEXT: v_sub_u32_e32 v9, v6, v1 -; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v8, v0 -; GFX9-NEXT: v_subb_co_u32_e64 v8, s[4:5], v9, v2, vcc -; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v0, v3 -; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 +; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v7, vcc +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0 +; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2 +; GFX9-NEXT: v_xor_b32_e32 v4, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 +; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc +; GFX9-NEXT: v_mul_lo_u32 v6, v10, v2 +; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 +; GFX9-NEXT: v_add3_u32 v1, v1, v8, v6 +; GFX9-NEXT: v_sub_u32_e32 v6, v4, v1 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0 +; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc +; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v11 +; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5] +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v11 ; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v2 -; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[6:7] -; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v4 -; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v5, s[6:7] -; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v4 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v5, s[6:7] -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v12 -; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v14, s[6:7] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v10 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v2 +; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7] +; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc +; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7] +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 +; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v5, v16, v14, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v8, v2, s[4:5] -; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v9, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 +; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v13, s[6:7] +; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9 +; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5 +; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 +; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5] +; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9] +; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v8, v11 ; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v6, v15, v13, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_xor_b32_e32 v6, v7, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[6:7] -; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc -; GFX9-NEXT: v_xor_b32_e32 v4, v4, v6 +; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_xor_b32_e32 v5, v5, v6 -; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v6 ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7 -; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v5, v6, s[8:9] ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7 ; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7 ; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc @@ -884,12 +884,12 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) { ; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 ; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc -; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 ; GFX9-NEXT: v_rcp_f32_e32 v4, v4 ; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 ; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 ; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 ; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll index a9a6075516f6c..50693a92bc92c 100644 --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -1804,12 +1804,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; CISI-NEXT: v_cvt_f32_u32_e32 v1, s3 ; CISI-NEXT: s_sub_u32 s0, 0, s2 ; CISI-NEXT: s_subb_u32 s1, 0, s3 -; CISI-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; CISI-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; CISI-NEXT: v_rcp_f32_e32 v0, v0 ; CISI-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; CISI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; CISI-NEXT: v_trunc_f32_e32 v1, v1 -; CISI-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; CISI-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; CISI-NEXT: v_cvt_u32_f32_e32 v1, v1 ; CISI-NEXT: v_cvt_u32_f32_e32 v0, v0 ; CISI-NEXT: v_mul_lo_u32 v2, s0, v1 @@ -1954,12 +1954,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; VI-NEXT: v_cvt_f32_u32_e32 v1, s3 ; VI-NEXT: s_sub_u32 s8, 0, s2 ; VI-NEXT: s_subb_u32 s9, 0, s3 -; VI-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; VI-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; VI-NEXT: v_rcp_f32_e32 v0, v0 ; VI-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; VI-NEXT: v_trunc_f32_e32 v1, v1 -; VI-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; VI-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; VI-NEXT: v_cvt_u32_f32_e32 v4, v1 ; VI-NEXT: v_cvt_u32_f32_e32 v5, v0 ; VI-NEXT: v_mul_lo_u32 v2, s8, v4 @@ -2111,12 +2111,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3 ; GFX9-NEXT: s_sub_u32 s0, 0, s2 ; GFX9-NEXT: s_subb_u32 s1, 0, s3 -; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX9-NEXT: v_rcp_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX9-NEXT: v_trunc_f32_e32 v1, v1 -; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s10, v1 @@ -2279,12 +2279,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1010-NEXT: v_cvt_f32_u32_e32 v1, s3 ; GFX1010-NEXT: s_sub_u32 s9, 0, s2 ; GFX1010-NEXT: s_subb_u32 s10, 0, s3 -; GFX1010-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX1010-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX1010-NEXT: v_rcp_f32_e32 v0, v0 ; GFX1010-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX1010-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX1010-NEXT: v_trunc_f32_e32 v1, v1 -; GFX1010-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX1010-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX1010-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX1010-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1010-NEXT: v_readfirstlane_b32 s0, v1 @@ -2441,12 +2441,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1030W32-NEXT: v_cvt_f32_u32_e32 v1, s3 ; GFX1030W32-NEXT: s_sub_u32 s9, 0, s2 ; GFX1030W32-NEXT: s_subb_u32 s10, 0, s3 -; GFX1030W32-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1 +; GFX1030W32-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0 ; GFX1030W32-NEXT: v_rcp_f32_e32 v0, v0 ; GFX1030W32-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX1030W32-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX1030W32-NEXT: v_trunc_f32_e32 v1, v1 -; GFX1030W32-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX1030W32-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0 ; GFX1030W32-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX1030W32-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1030W32-NEXT: v_readfirstlane_b32 s0, v1 @@ -2603,12 +2603,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1030W64-NEXT: v_cvt_f32_u32_e32 v1, s3 ; GFX1030W64-NEXT: s_sub_u32 s9, 0, s2 ; GFX1030W64-NEXT: s_subb_u32 s10, 0, s3 -; GFX1030W64-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1 +; GFX1030W64-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0 ; GFX1030W64-NEXT: v_rcp_f32_e32 v0, v0 ; GFX1030W64-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX1030W64-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX1030W64-NEXT: v_trunc_f32_e32 v1, v1 -; GFX1030W64-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX1030W64-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0 ; GFX1030W64-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX1030W64-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1030W64-NEXT: v_readfirstlane_b32 s8, v1 @@ -2766,7 +2766,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX11-NEXT: s_sub_u32 s9, 0, s2 ; GFX11-NEXT: s_subb_u32 s10, 0, s3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1 +; GFX11-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0 ; GFX11-NEXT: v_rcp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 @@ -2774,7 +2774,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX11-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX11-NEXT: v_trunc_f32_e32 v1, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1 +; GFX11-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0 ; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll index fe649d4333041..9fb0cab068d28 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll @@ -7,6 +7,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: v_mov_b32_e32 v4, 0 +; GFX10-NEXT: v_mov_b32_e32 v7, 0x3ca3d70a ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D @@ -36,34 +37,33 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GFX10-NEXT: v_fma_f32 v1, v1, v5, s28 ; GFX10-NEXT: v_max_f32_e64 v6, s0, s0 clamp ; GFX10-NEXT: v_add_f32_e64 v5, s29, -1.0 -; GFX10-NEXT: v_sub_f32_e32 v8, s0, v1 -; GFX10-NEXT: v_fma_f32 v7, -s2, v6, s6 +; GFX10-NEXT: v_sub_f32_e32 v9, s0, v1 +; GFX10-NEXT: v_fma_f32 v8, -s2, v6, s6 ; GFX10-NEXT: v_fma_f32 v5, v6, v5, 1.0 -; GFX10-NEXT: v_mad_f32 v10, s2, v6, v2 -; GFX10-NEXT: s_mov_b32 s0, 0x3c23d70a -; GFX10-NEXT: v_fmac_f32_e32 v1, v6, v8 -; GFX10-NEXT: v_fmac_f32_e32 v10, v7, v6 +; GFX10-NEXT: v_mad_f32 v11, s2, v6, v2 +; GFX10-NEXT: v_fmac_f32_e32 v1, v6, v9 +; GFX10-NEXT: v_fmac_f32_e32 v11, v8, v6 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mul_f32_e32 v9, s10, v0 +; GFX10-NEXT: v_mul_f32_e32 v10, s10, v0 ; GFX10-NEXT: v_fma_f32 v0, -v0, s10, s14 -; GFX10-NEXT: v_mul_f32_e32 v8, s18, v2 +; GFX10-NEXT: v_mul_f32_e32 v9, s18, v2 ; GFX10-NEXT: v_mul_f32_e32 v3, s22, v3 -; GFX10-NEXT: v_fmac_f32_e32 v9, v0, v6 +; GFX10-NEXT: v_fmac_f32_e32 v10, v0, v6 ; GFX10-NEXT: v_sub_f32_e32 v0, v1, v5 -; GFX10-NEXT: v_mul_f32_e32 v1, v8, v6 -; GFX10-NEXT: v_mul_f32_e32 v7, v6, v3 -; GFX10-NEXT: v_fma_f32 v3, -v6, v3, v9 +; GFX10-NEXT: v_mul_f32_e32 v1, v9, v6 +; GFX10-NEXT: v_mul_f32_e32 v8, v6, v3 +; GFX10-NEXT: v_fma_f32 v3, -v6, v3, v10 ; GFX10-NEXT: v_fmac_f32_e32 v5, v0, v6 ; GFX10-NEXT: v_fma_f32 v0, v2, s26, -v1 -; GFX10-NEXT: v_fmac_f32_e32 v7, v3, v6 +; GFX10-NEXT: v_fmac_f32_e32 v8, v3, v6 ; GFX10-NEXT: v_fmac_f32_e32 v1, v0, v6 ; GFX10-NEXT: v_mul_f32_e32 v0, v2, v6 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_f32_e32 v4, v4, v10 +; GFX10-NEXT: v_add_f32_e32 v4, v4, v11 ; GFX10-NEXT: v_mul_f32_e32 v3, v4, v6 -; GFX10-NEXT: v_fmaak_f32 v4, s0, v5, 0x3ca3d70a +; GFX10-NEXT: v_fmamk_f32 v4, v5, 0x3c23d70a, v7 ; GFX10-NEXT: v_mul_f32_e32 v1, v3, v1 -; GFX10-NEXT: v_mul_f32_e32 v2, v7, v4 +; GFX10-NEXT: v_mul_f32_e32 v2, v8, v4 ; GFX10-NEXT: v_fmac_f32_e32 v1, v2, v0 ; GFX10-NEXT: v_max_f32_e32 v0, 0, v1 ; GFX10-NEXT: ; return to shader part epilog @@ -71,7 +71,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GFX11-LABEL: _amdgpu_ps_main: ; GFX11: ; %bb.0: ; %.entry ; GFX11-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D -; GFX11-NEXT: v_mov_b32_e32 v4, 0 +; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 0x3ca3d70a ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D @@ -96,43 +96,40 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 { ; GFX11-NEXT: s_buffer_load_b128 s[20:23], s[0:3], 0x70 ; GFX11-NEXT: v_fma_f32 v1, v1, v5, s28 ; GFX11-NEXT: v_max_f32_e64 v6, s0, s0 clamp -; GFX11-NEXT: s_buffer_load_b128 s[24:27], s[0:3], 0x10 ; GFX11-NEXT: v_add_f32_e64 v5, s29, -1.0 +; GFX11-NEXT: s_buffer_load_b128 s[24:27], s[0:3], 0x10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_sub_f32_e32 v8, s0, v1 -; GFX11-NEXT: v_fma_f32 v7, -s2, v6, s6 -; GFX11-NEXT: v_fma_f32 v10, s2, v6, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) +; GFX11-NEXT: v_sub_f32_e32 v9, s0, v1 +; GFX11-NEXT: v_fma_f32 v8, -s2, v6, s6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) ; GFX11-NEXT: v_fma_f32 v5, v6, v5, 1.0 -; GFX11-NEXT: s_mov_b32 s0, 0x3c23d70a +; GFX11-NEXT: v_fma_f32 v11, s2, v6, v2 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: v_mul_f32_e32 v9, s10, v0 +; GFX11-NEXT: v_mul_f32_e32 v10, s10, v0 ; GFX11-NEXT: v_fma_f32 v0, -v0, s10, s14 -; GFX11-NEXT: v_mul_f32_e32 v3, s22, v3 -; GFX11-NEXT: v_dual_fmac_f32 v1, v6, v8 :: v_dual_mul_f32 v8, s18, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_fmac_f32_e32 v9, v0, v6 -; GFX11-NEXT: v_dual_fmac_f32 v10, v7, v6 :: v_dual_mul_f32 v7, v6, v3 +; GFX11-NEXT: v_fmac_f32_e32 v1, v6, v9 +; GFX11-NEXT: v_mul_f32_e32 v9, s18, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_fmac_f32_e32 v10, v0, v6 ; GFX11-NEXT: v_sub_f32_e32 v0, v1, v5 -; GFX11-NEXT: v_fma_f32 v3, -v6, v3, v9 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_fmac_f32_e32 v7, v3, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f32_e32 v5, v0, v6 -; GFX11-NEXT: v_mul_f32_e32 v1, v8, v6 +; GFX11-NEXT: v_mul_f32_e32 v3, s22, v3 +; GFX11-NEXT: v_dual_fmac_f32 v11, v8, v6 :: v_dual_mul_f32 v8, v6, v3 +; GFX11-NEXT: v_mul_f32_e32 v1, v9, v6 +; GFX11-NEXT: v_fma_f32 v3, -v6, v3, v10 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_add_f32_e32 v4, v4, v10 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_dual_mul_f32 v3, v4, v6 :: v_dual_fmaak_f32 v4, s0, v5, 0x3ca3d70a +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_add_f32_e32 v4, v4, v11 ; GFX11-NEXT: v_fma_f32 v0, v2, s26, -v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f32_e32 v1, v0, v6 ; GFX11-NEXT: v_mul_f32_e32 v0, v2, v6 -; GFX11-NEXT: v_mul_f32_e32 v2, v7, v4 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_mul_f32_e32 v1, v3, v1 +; GFX11-NEXT: v_fmac_f32_e32 v8, v3, v6 +; GFX11-NEXT: v_dual_mul_f32 v3, v4, v6 :: v_dual_fmamk_f32 v4, v5, 0x3c23d70a, v7 +; GFX11-NEXT: v_dual_mul_f32 v1, v3, v1 :: v_dual_mul_f32 v2, v8, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_fmac_f32_e32 v1, v2, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_max_f32_e32 v0, 0, v1 ; GFX11-NEXT: ; return to shader part epilog .entry: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll index 226670a550014..d4ad53291070d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll @@ -33,7 +33,7 @@ define amdgpu_kernel void @mad_f16_imm_a( } ; GCN-LABEL: {{^}}mad_f16_imm_b: -; GCN: v_mac_f16_e32 {{v[0-9]+}}, 0x4800, {{v[0-9]+$}} +; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+$}} define amdgpu_kernel void @mad_f16_imm_b( ptr addrspace(1) %r, ptr addrspace(1) %a, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll index f90d338ffc487..1926d0f8f0d6f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll @@ -256,8 +256,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NEXT: v_mac_f32_e32 v1, 0x40400000, v0 -; SI-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-NEXT: v_madmk_f32 v0, v0, 0x40400000, v1 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -280,8 +280,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; VI-FLUSH-NEXT: s_waitcnt vmcnt(0) ; VI-FLUSH-NEXT: s_mov_b32 s0, s4 ; VI-FLUSH-NEXT: s_mov_b32 s1, s5 -; VI-FLUSH-NEXT: v_mac_f16_e32 v1, 0x4200, v0 -; VI-FLUSH-NEXT: buffer_store_short v1, off, s[0:3], 0 +; VI-FLUSH-NEXT: v_madmk_f16 v0, v0, 0x4200, v1 +; VI-FLUSH-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-FLUSH-NEXT: s_endpgm ; ; VI-DENORM-LABEL: fmuladd_f16_imm_a: @@ -353,8 +353,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a( ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) ; GFX10-DENORM-NEXT: s_mov_b32 s0, s4 ; GFX10-DENORM-NEXT: s_mov_b32 s1, s5 -; GFX10-DENORM-NEXT: v_fmac_f16_e32 v1, 0x4200, v0 -; GFX10-DENORM-NEXT: buffer_store_short v1, off, s[0:3], 0 +; GFX10-DENORM-NEXT: v_fmamk_f16 v0, v0, 0x4200, v1 +; GFX10-DENORM-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX10-DENORM-NEXT: s_endpgm ; ; GFX11-FLUSH-LABEL: fmuladd_f16_imm_a: @@ -442,8 +442,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; SI-NEXT: s_mov_b32 s1, s5 ; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 -; SI-NEXT: v_mac_f32_e32 v1, 0x40400000, v0 -; SI-NEXT: v_cvt_f16_f32_e32 v0, v1 +; SI-NEXT: v_madmk_f32 v0, v0, 0x40400000, v1 +; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -466,8 +466,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; VI-FLUSH-NEXT: s_waitcnt vmcnt(0) ; VI-FLUSH-NEXT: s_mov_b32 s0, s4 ; VI-FLUSH-NEXT: s_mov_b32 s1, s5 -; VI-FLUSH-NEXT: v_mac_f16_e32 v1, 0x4200, v0 -; VI-FLUSH-NEXT: buffer_store_short v1, off, s[0:3], 0 +; VI-FLUSH-NEXT: v_madmk_f16 v0, v0, 0x4200, v1 +; VI-FLUSH-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-FLUSH-NEXT: s_endpgm ; ; VI-DENORM-LABEL: fmuladd_f16_imm_b: @@ -539,8 +539,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b( ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) ; GFX10-DENORM-NEXT: s_mov_b32 s0, s4 ; GFX10-DENORM-NEXT: s_mov_b32 s1, s5 -; GFX10-DENORM-NEXT: v_fmac_f16_e32 v1, 0x4200, v0 -; GFX10-DENORM-NEXT: buffer_store_short v1, off, s[0:3], 0 +; GFX10-DENORM-NEXT: v_fmamk_f16 v0, v0, 0x4200, v1 +; GFX10-DENORM-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX10-DENORM-NEXT: s_endpgm ; ; GFX11-FLUSH-LABEL: fmuladd_f16_imm_b: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index f55242a8726be..528232a203acf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -2269,7 +2269,7 @@ define float @v_log_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -2472,7 +2472,7 @@ define float @v_log_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -2675,7 +2675,7 @@ define float @v_log_fneg_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -2878,7 +2878,7 @@ define float @v_log_fneg_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -3015,9 +3015,7 @@ define float @v_log_f32_fast(float %in) { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_fast: @@ -3135,9 +3133,7 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_unsafe_math_attr: @@ -3255,9 +3251,7 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_approx_fn_attr: @@ -3441,7 +3435,7 @@ define float @v_log_f32_ninf(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -3577,9 +3571,7 @@ define float @v_log_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_afn: @@ -3726,9 +3718,7 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic: @@ -3844,11 +3834,10 @@ define float @v_fabs_log_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0 -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn: @@ -3982,7 +3971,7 @@ define float @v_log_f32_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4174,7 +4163,7 @@ define float @v_log_f32_nnan(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -4324,7 +4313,7 @@ define float @v_log_f32_nnan_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4516,7 +4505,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -4666,7 +4655,7 @@ define float @v_log_f32_ninf_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4858,7 +4847,7 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -5041,9 +5030,9 @@ define float @v_log_f32_nnan_ninf(float %in) { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -5152,9 +5141,9 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_daz: @@ -5322,9 +5311,9 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -5548,7 +5537,7 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -5722,7 +5711,7 @@ define float @v_log_f32_undef() { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -5887,7 +5876,7 @@ define float @v_log_f32_0() { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -6041,7 +6030,7 @@ define float @v_log_f32_from_fpext_f16(i16 %src.i) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -6212,7 +6201,7 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -6390,7 +6379,7 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index ab6325216c06d..2e5bf2e560951 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -2269,7 +2269,7 @@ define float @v_log10_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -2472,7 +2472,7 @@ define float @v_log10_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -2675,7 +2675,7 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -2878,7 +2878,7 @@ define float @v_log10_fneg_f32(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -3015,9 +3015,7 @@ define float @v_log10_f32_fast(float %in) { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_fast: @@ -3135,9 +3133,7 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_unsafe_math_attr: @@ -3255,9 +3251,7 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_approx_fn_attr: @@ -3441,7 +3435,7 @@ define float @v_log10_f32_ninf(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -3577,9 +3571,7 @@ define float @v_log10_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_afn: @@ -3726,9 +3718,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic: @@ -3844,11 +3834,10 @@ define float @v_fabs_log10_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0 ; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0 -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn: @@ -3982,7 +3971,7 @@ define float @v_log10_f32_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4174,7 +4163,7 @@ define float @v_log10_f32_nnan(float %in) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -4324,7 +4313,7 @@ define float @v_log10_f32_nnan_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4516,7 +4505,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -4666,7 +4655,7 @@ define float @v_log10_f32_ninf_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -4858,7 +4847,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -5041,9 +5030,9 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -5152,9 +5141,9 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_daz: @@ -5322,9 +5311,9 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] @@ -5548,7 +5537,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 @@ -5722,7 +5711,7 @@ define float @v_log10_f32_undef() { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -5887,7 +5876,7 @@ define float @v_log10_f32_0() { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -6041,7 +6030,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -6212,7 +6201,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo @@ -6390,7 +6379,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll index 51a0a50fbbff5..00e226291e68b 100644 --- a/llvm/test/CodeGen/AMDGPU/madmk.ll +++ b/llvm/test/CodeGen/AMDGPU/madmk.ll @@ -11,7 +11,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; GCN-LABEL: {{^}}madmk_f32: ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}} ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]] +; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]] define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid @@ -96,7 +96,7 @@ define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32 ; GCN-DAG: s_load_dword [[SREG:s[0-9]+]] ; GCN-DAG: buffer_load_dword [[VREG1:v[0-9]+]] ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]] -; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]] +; GCN: v_madmk_f32 {{v[0-9]+}}, [[VREG1]], 0x41200000, [[VREG2]] ; GCN: s_endpgm define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) #0 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -171,8 +171,9 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias % ; GCN-LABEL: {{^}}madmk_add_inline_imm_f32: ; GCN: buffer_load_dword [[A:v[0-9]+]] -; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000 -; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0 +; GCN: v_mov_b32_e32 [[B:v[0-9]+]], 2.0 +; GCN: v_madmk_f32 {{v[0-9]+}}, [[A]], 0x41200000, [[B]] + define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll index a399b509014dd..b54cc27db1d12 100644 --- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll +++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll @@ -112,8 +112,8 @@ entry: ; A subregister use operand should not be tied. ; CHECK-LABEL: {{^}}no_fold_tied_subregister: ; CHECK: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]] -; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]] -; CHECK: buffer_store_dword v[[LO]] +; CHECK: v_madmk_f32 v[[RES:[0-9]+]], v[[HI]], 0x41200000, v[[LO]] +; CHECK: buffer_store_dword v[[RES]] define amdgpu_kernel void @no_fold_tied_subregister() #1 { %tmp1 = load volatile <2 x float>, ptr addrspace(1) undef %tmp2 = extractelement <2 x float> %tmp1, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index 705a2af739590..4f2fd3f50494c 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -19,14 +19,14 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: s_sub_u32 s4, 0, s10 ; GCN-NEXT: s_subb_u32 s5, 0, s11 ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_ashr_i32 s12, s3, 31 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: s_add_u32 s2, s2, s12 @@ -247,12 +247,12 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) { ; GCN-NEXT: v_cvt_f32_u32_e32 v6, v2 ; GCN-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GCN-NEXT: v_subb_u32_e32 v8, vcc, 0, v2, vcc -; GCN-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; GCN-NEXT: v_madmk_f32 v5, v6, 0x4f800000, v5 ; GCN-NEXT: v_rcp_f32_e32 v5, v5 ; GCN-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; GCN-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 ; GCN-NEXT: v_trunc_f32_e32 v6, v6 -; GCN-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; GCN-NEXT: v_madmk_f32 v5, v6, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GCN-NEXT: v_mul_hi_u32 v9, v7, v5 @@ -1093,12 +1093,12 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3 ; GCN-NEXT: s_sub_u32 s4, 0, s2 ; GCN-NEXT: s_subb_u32 s5, 0, s3 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s4, v1 @@ -1287,12 +1287,12 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-NEXT: v_cvt_f32_u32_e32 v4, v1 ; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v0 ; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc -; GCN-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3 ; GCN-NEXT: v_rcp_f32_e32 v3, v3 ; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 ; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 ; GCN-NEXT: v_trunc_f32_e32 v4, v4 -; GCN-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; GCN-NEXT: v_madmk_f32 v3, v4, 0xcf800000, v3 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 ; GCN-NEXT: v_mul_hi_u32 v7, v5, v3 @@ -1484,12 +1484,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_cvt_f32_u32_e32 v4, v1 ; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v0 ; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc -; GCN-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4 +; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3 ; GCN-NEXT: v_rcp_f32_e32 v3, v3 ; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 ; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 ; GCN-NEXT: v_trunc_f32_e32 v4, v4 -; GCN-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; GCN-NEXT: v_madmk_f32 v3, v4, 0xcf800000, v3 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 ; GCN-NEXT: v_mul_hi_u32 v7, v5, v3 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 613349f32e2d5..24319a639da44 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: s_sub_u32 s0, 0, s12 ; GCN-NEXT: s_subb_u32 s1, 0, s13 ; GCN-NEXT: s_mov_b32 s4, s8 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: s_mov_b32 s5, s9 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s0, v1 @@ -226,12 +226,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-NEXT: v_cvt_f32_u32_e32 v5, v2 ; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 ; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v2, vcc -; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 ; GCN-NEXT: v_rcp_f32_e32 v4, v4 ; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GCN-NEXT: v_trunc_f32_e32 v5, v5 -; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 @@ -894,7 +894,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: s_sub_u32 s0, 0, s12 ; GCN-NEXT: s_subb_u32 s1, 0, s13 ; GCN-NEXT: s_ashr_i32 s6, s7, 31 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: s_mov_b32 s7, s6 ; GCN-NEXT: s_mov_b32 s8, s4 @@ -902,7 +902,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s0, v1 @@ -1290,13 +1290,13 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_sub_u32 s2, 0, s8 ; GCN-NEXT: s_subb_u32 s3, 0, s9 ; GCN-NEXT: s_mov_b32 s4, s0 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s2, v1 @@ -1481,12 +1481,12 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0 ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc -; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 -; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v6, v4, v2 @@ -1676,12 +1676,12 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0 ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc -; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 -; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v6, v4, v2 diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index cf30131b8ab58..012b3f976734d 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2527,7 +2527,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; SI-NEXT: v_trunc_f32_e32 v3, v3 -; SI-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; SI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 ; SI-NEXT: v_mul_hi_u32 v4, v2, s4 @@ -2626,7 +2626,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; VI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; VI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; VI-NEXT: v_trunc_f32_e32 v3, v3 -; VI-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; VI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; VI-NEXT: v_cvt_u32_f32_e32 v6, v2 ; VI-NEXT: v_cvt_u32_f32_e32 v7, v3 ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 @@ -2713,7 +2713,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 -; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v6, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v7, v3 ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index c5ab44e31c032..e23f3cfad89bc 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -14,12 +14,12 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9 ; GCN-NEXT: s_sub_u32 s4, 0, s8 ; GCN-NEXT: s_subb_u32 s5, 0, s9 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s4, v1 @@ -211,12 +211,12 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 ; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc -; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 ; GCN-NEXT: v_rcp_f32_e32 v4, v4 ; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GCN-NEXT: v_trunc_f32_e32 v5, v5 -; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 ; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 @@ -688,7 +688,7 @@ define amdgpu_kernel void @s_test_udiv24_i48(ptr addrspace(1) %out, i48 %x, i48 ; GCN-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 ; GCN-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 ; GCN-NEXT: v_trunc_f32_e32 v2, v2 -; GCN-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; GCN-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: s_mov_b32 s2, -1 @@ -886,12 +886,12 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3 ; GCN-NEXT: s_sub_u32 s4, 0, s2 ; GCN-NEXT: s_subb_u32 s5, 0, s3 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s4, v1 @@ -1067,12 +1067,12 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0 ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc -; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 -; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v4, v3 @@ -1335,7 +1335,7 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: s_mov_b32 s2, -1 @@ -1509,7 +1509,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 -; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v4, v2, s4 diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 894c96acbbcd6..f68d14a32b929 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: s_sub_u32 s0, 0, s12 ; GCN-NEXT: s_subb_u32 s1, 0, s13 ; GCN-NEXT: s_mov_b32 s4, s8 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: s_mov_b32 s5, s9 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s0, v1 @@ -221,12 +221,12 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_cvt_f32_u32_e32 v5, v3 ; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 ; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc -; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4 ; GCN-NEXT: v_rcp_f32_e32 v4, v4 ; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 ; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 ; GCN-NEXT: v_trunc_f32_e32 v5, v5 -; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 +; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 ; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 @@ -716,13 +716,13 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_sub_u32 s0, 0, s6 ; GCN-NEXT: s_subb_u32 s1, 0, s7 ; GCN-NEXT: s_mov_b32 s8, s4 -; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: s_mov_b32 s9, s5 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_mul_lo_u32 v2, s0, v1 @@ -903,7 +903,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 -; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -1086,12 +1086,12 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0 ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc -; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 -; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 +; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_mul_lo_u32 v6, v4, v3 diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll index 3df7f3c26aad8..2b5762e1fa2a5 100644 --- a/llvm/test/CodeGen/AMDGPU/v_mac.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll @@ -253,7 +253,7 @@ bb: ; SI: v_add_f32_e32 [[TMP2:v[0-9]+]], [[CVT_A]], [[CVT_A]] ; SI: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 -; SI: v_mac_f32_e32 v{{[0-9]+}}, 0x41000000, v{{[0-9]+}} +; SI: v_madmk_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0x41000000, v{{[0-9]+}} ; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]] ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 94b822ac48875..cadc23414dcac 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -743,12 +743,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 { ; GFX1032-NEXT: v_cvt_f32_u32_e32 v1, s5 ; GFX1032-NEXT: s_sub_u32 s9, 0, s4 ; GFX1032-NEXT: s_subb_u32 s10, 0, s5 -; GFX1032-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX1032-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX1032-NEXT: v_rcp_f32_e32 v0, v0 ; GFX1032-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX1032-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX1032-NEXT: v_trunc_f32_e32 v1, v1 -; GFX1032-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX1032-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX1032-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX1032-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1032-NEXT: v_readfirstlane_b32 s0, v1 @@ -905,12 +905,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 { ; GFX1064-NEXT: v_cvt_f32_u32_e32 v1, s5 ; GFX1064-NEXT: s_sub_u32 s9, 0, s4 ; GFX1064-NEXT: s_subb_u32 s10, 0, s5 -; GFX1064-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1 +; GFX1064-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX1064-NEXT: v_rcp_f32_e32 v0, v0 ; GFX1064-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX1064-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX1064-NEXT: v_trunc_f32_e32 v1, v1 -; GFX1064-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1 +; GFX1064-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 ; GFX1064-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX1064-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1064-NEXT: v_readfirstlane_b32 s8, v1