diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index f326416a32417..48f337e192e60 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -903,6 +903,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::FADD, ISD::FSUB, ISD::FDIV, + ISD::FMUL, ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINNUM_IEEE, @@ -14595,6 +14596,66 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N, return SDValue(); } +SDValue SITargetLowering::performFMulCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + EVT ScalarVT = VT.getScalarType(); + EVT IntVT = VT.changeElementType(MVT::i32); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // It is cheaper to realize i32 inline constants as compared against + // materializing f16 or f64 (or even non-inline f32) values, + // possible via ldexp usage, as shown below : + // + // Given : A = 2^a & B = 2^b ; where a and b are integers. + // fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) ) + // fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) ) + if ((ScalarVT == MVT::f64 || ScalarVT == MVT::f32 || ScalarVT == MVT::f16) && + (RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT)) { + const ConstantFPSDNode *TrueNode = isConstOrConstSplatFP(RHS.getOperand(1)); + if (!TrueNode) + return SDValue(); + const ConstantFPSDNode *FalseNode = + isConstOrConstSplatFP(RHS.getOperand(2)); + if (!FalseNode) + return SDValue(); + + if (TrueNode->isNegative() != FalseNode->isNegative()) + return SDValue(); + + // For f32, only non-inline constants should be transformed. + const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); + if (ScalarVT == MVT::f32 && + TII->isInlineConstant(TrueNode->getValueAPF()) && + TII->isInlineConstant(FalseNode->getValueAPF())) + return SDValue(); + + int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs(); + if (TrueNodeExpVal == INT_MIN) + return SDValue(); + int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs(); + if (FalseNodeExpVal == INT_MIN) + return SDValue(); + + SDLoc SL(N); + SDValue SelectNode = + DAG.getNode(ISD::SELECT, SL, IntVT, RHS.getOperand(0), + DAG.getSignedConstant(TrueNodeExpVal, SL, IntVT), + DAG.getSignedConstant(FalseNodeExpVal, SL, IntVT)); + + LHS = TrueNode->isNegative() + ? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHS->getFlags()) + : LHS; + + return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, N->getFlags()); + } + + return SDValue(); +} + SDValue SITargetLowering::performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -14881,6 +14942,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performFSubCombine(N, DCI); case ISD::FDIV: return performFDivCombine(N, DCI); + case ISD::FMUL: + return performFMulCombine(N, DCI); case ISD::SETCC: return performSetCCCombine(N, DCI); case ISD::FMAXNUM: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 32e110fdfa84d..631f26542bbe6 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -218,6 +218,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performFMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index 9d0d85da9f7fa..25b6b7be1f3b5 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -82,10 +82,10 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) { ; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 ; CHECK-NEXT: s_mov_b32 s4, 0x800000 ; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; CHECK-NEXT: v_cvt_i32_f32_e32 v1, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc -; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3 +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 ; CHECK-NEXT: v_log_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1 ; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 @@ -98,10 +98,10 @@ define float @test_pow_fast_f32__integral_y(float %x, i32 %y.i) { ; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc ; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3 ; CHECK-NEXT: v_exp_f32_e32 v2, v2 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc +; CHECK-NEXT: v_not_b32_e32 v3, 63 +; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1 -; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3 +; CHECK-NEXT: v_ldexp_f32 v2, v2, v3 ; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2 ; CHECK-NEXT: s_setpc_b64 s[30:31] %y = sitofp i32 %y.i to float @@ -228,9 +228,9 @@ define float @test_powr_fast_f32(float %x, float %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s4, 0x800000 ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; CHECK-NEXT: v_ldexp_f32 v0, v0, v3 ; CHECK-NEXT: v_log_f32_e32 v0, v0 ; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 ; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc @@ -242,9 +242,9 @@ define float @test_powr_fast_f32(float %x, float %y) { ; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc ; CHECK-NEXT: v_fma_f32 v0, v1, v0, v2 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 -; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: v_not_b32_e32 v1, 63 +; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; CHECK-NEXT: v_ldexp_f32 v0, v0, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] %powr = tail call fast float @_Z4powrff(float %x, float %y) ret float %powr @@ -368,9 +368,9 @@ define float @test_pown_fast_f32(float %x, i32 %y) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s4, 0x800000 ; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc -; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 ; CHECK-NEXT: v_log_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_f32_i32_e32 v4, v1 ; CHECK-NEXT: v_mov_b32_e32 v2, 0x42000000 @@ -383,10 +383,10 @@ define float @test_pown_fast_f32(float %x, i32 %y) { ; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc ; CHECK-NEXT: v_fma_f32 v2, v2, v4, v3 ; CHECK-NEXT: v_exp_f32_e32 v2, v2 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc +; CHECK-NEXT: v_not_b32_e32 v3, 63 +; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 31, v1 -; CHECK-NEXT: v_mul_f32_e32 v2, v2, v3 +; CHECK-NEXT: v_ldexp_f32 v2, v2, v3 ; CHECK-NEXT: v_and_or_b32 v0, v1, v0, v2 ; CHECK-NEXT: s_setpc_b64 s[30:31] %call = tail call fast float @_Z4pownfi(float %x, i32 %y) @@ -511,9 +511,9 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s4, 0x800000 ; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc -; CHECK-NEXT: v_mul_f32_e64 v0, |v0|, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; CHECK-NEXT: v_ldexp_f32 v0, |v0|, v3 ; CHECK-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; CHECK-NEXT: v_log_f32_e32 v0, v0 ; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 @@ -527,9 +527,9 @@ define float @test_pown_fast_f32_known_even(float %x, i32 %y.arg) { ; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v3, vcc ; CHECK-NEXT: v_fma_f32 v0, v0, v1, v2 ; CHECK-NEXT: v_exp_f32_e32 v0, v0 -; CHECK-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1 +; CHECK-NEXT: v_not_b32_e32 v1, 63 +; CHECK-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; CHECK-NEXT: v_ldexp_f32 v0, v0, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] %y = shl i32 %y.arg, 1 %call = tail call fast float @_Z4pownfi(float %x, i32 %y) @@ -651,9 +651,9 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) { ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s4, 0x800000 ; CHECK-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; CHECK-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc -; CHECK-NEXT: v_mul_f32_e64 v3, |v0|, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; CHECK-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; CHECK-NEXT: v_ldexp_f32 v3, |v0|, v3 ; CHECK-NEXT: v_or_b32_e32 v1, 1, v1 ; CHECK-NEXT: v_log_f32_e32 v3, v3 ; CHECK-NEXT: v_cvt_f32_i32_e32 v1, v1 @@ -667,10 +667,10 @@ define float @test_pown_fast_f32_known_odd(float %x, i32 %y.arg) { ; CHECK-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc ; CHECK-NEXT: v_fma_f32 v1, v2, v1, v3 ; CHECK-NEXT: v_exp_f32_e32 v1, v1 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x1f800000 -; CHECK-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc +; CHECK-NEXT: v_not_b32_e32 v2, 63 +; CHECK-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; CHECK-NEXT: s_brev_b32 s4, -2 -; CHECK-NEXT: v_mul_f32_e32 v1, v1, v2 +; CHECK-NEXT: v_ldexp_f32 v1, v1, v2 ; CHECK-NEXT: v_bfi_b32 v0, s4, v1, v0 ; CHECK-NEXT: s_setpc_b64 s[30:31] %y = or i32 %y.arg, 1 diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 9a647f04d43da..bc359d6ff3aaa 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -25054,26 +25054,26 @@ define bfloat @v_log_bf16(bfloat %a) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 0x800000 -; GCN-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GCN-NEXT: s_mov_b32 s5, 0x7f800000 -; GCN-NEXT: v_mov_b32_e32 v2, 0x41b17218 +; GCN-NEXT: v_mov_b32_e32 v1, 0x41b17218 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GCN-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; GCN-NEXT: v_sub_f32_e32 v3, v0, v1 -; GCN-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v1 -; GCN-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; GCN-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; GCN-NEXT: v_sub_f32_e32 v3, v0, v2 +; GCN-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; GCN-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 ; GCN-NEXT: v_mul_f32_e32 v5, 0x3f317000, v3 ; GCN-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v3 ; GCN-NEXT: v_add_f32_e32 v3, v4, v3 ; GCN-NEXT: v_add_f32_e32 v3, v5, v3 -; GCN-NEXT: v_add_f32_e32 v1, v1, v3 +; GCN-NEXT: v_add_f32_e32 v2, v2, v3 ; GCN-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s5 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -25084,10 +25084,10 @@ define bfloat @v_log_bf16(bfloat %a) { ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GFX7-NEXT: s_mov_b32 s4, 0x800000 -; GFX7-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX7-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_log_f32_e32 v0, v0 ; GFX7-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX7-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -25109,10 +25109,10 @@ define bfloat @v_log_bf16(bfloat %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX8-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -25143,10 +25143,10 @@ define bfloat @v_log_bf16(bfloat %a) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_mov_b32 s4, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX9-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -25174,8 +25174,9 @@ define bfloat @v_log_bf16(bfloat %a) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX10-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 @@ -25199,28 +25200,30 @@ define bfloat @v_log_bf16(bfloat %a) { ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX11-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 ; GFX11-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x41b17218, vcc_lo ; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %op = call bfloat @llvm.log.bf16(bfloat %a) @@ -25233,14 +25236,14 @@ define bfloat @v_log2_bf16(bfloat %a) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 0x800000 -; GCN-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GCN-NEXT: v_mov_b32_e32 v2, 0x42000000 +; GCN-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GCN-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -25251,10 +25254,10 @@ define bfloat @v_log2_bf16(bfloat %a) { ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GFX7-NEXT: s_mov_b32 s4, 0x800000 -; GFX7-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX7-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_log_f32_e32 v0, v0 ; GFX7-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -25267,10 +25270,10 @@ define bfloat @v_log2_bf16(bfloat %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -25290,9 +25293,9 @@ define bfloat @v_log2_bf16(bfloat %a) { ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_mov_b32 s4, 0x800000 ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -25311,9 +25314,10 @@ define bfloat @v_log2_bf16(bfloat %a) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1 @@ -25330,20 +25334,21 @@ define bfloat @v_log2_bf16(bfloat %a) { ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add3_u32 v1, v1, v0, 0x7fff +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %op = call bfloat @llvm.log2.bf16(bfloat %a) @@ -25356,26 +25361,26 @@ define bfloat @v_log10_bf16(bfloat %a) { ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 0x800000 -; GCN-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GCN-NEXT: s_mov_b32 s5, 0x7f800000 -; GCN-NEXT: v_mov_b32_e32 v2, 0x411a209b +; GCN-NEXT: v_mov_b32_e32 v1, 0x411a209b ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GCN-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-NEXT: v_log_f32_e32 v0, v0 -; GCN-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 -; GCN-NEXT: v_sub_f32_e32 v3, v0, v1 -; GCN-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v1 -; GCN-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; GCN-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 +; GCN-NEXT: v_sub_f32_e32 v3, v0, v2 +; GCN-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; GCN-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 ; GCN-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v3 ; GCN-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v3 ; GCN-NEXT: v_add_f32_e32 v3, v4, v3 ; GCN-NEXT: v_add_f32_e32 v3, v5, v3 -; GCN-NEXT: v_add_f32_e32 v1, v1, v3 +; GCN-NEXT: v_add_f32_e32 v2, v2, v3 ; GCN-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s5 -; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] -; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: v_sub_f32_e32 v0, v0, v1 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -25386,10 +25391,10 @@ define bfloat @v_log10_bf16(bfloat %a) { ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GFX7-NEXT: s_mov_b32 s4, 0x800000 -; GFX7-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX7-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_log_f32_e32 v0, v0 ; GFX7-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX7-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -25411,10 +25416,10 @@ define bfloat @v_log10_bf16(bfloat %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x800000 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX8-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX8-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -25445,10 +25450,10 @@ define bfloat @v_log10_bf16(bfloat %a) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX9-NEXT: s_mov_b32 s4, 0x800000 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX9-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX9-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -25476,8 +25481,9 @@ define bfloat @v_log10_bf16(bfloat %a) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: v_log_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX10-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 @@ -25501,28 +25507,30 @@ define bfloat @v_log10_bf16(bfloat %a) { ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: v_log_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX11-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 ; GFX11-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_add_f32_e32 v1, v1, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x411a209b, vcc_lo ; GFX11-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add3_u32 v1, v1, v0, 0x7fff -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %op = call bfloat @llvm.log10.bf16(bfloat %a) @@ -25719,14 +25727,14 @@ define bfloat @v_exp2_bf16(bfloat %a) { ; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 0xc2fc0000 ; GCN-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GCN-NEXT: v_mov_b32_e32 v2, 0x1f800000 +; GCN-NEXT: v_not_b32_e32 v2, 63 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: v_add_f32_e32 v0, v0, v1 ; GCN-NEXT: v_exp_f32_e32 v0, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GCN-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] ; @@ -25741,9 +25749,9 @@ define bfloat @v_exp2_bf16(bfloat %a) { ; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX7-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_exp_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX7-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_not_b32_e32 v1, 63 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -25757,9 +25765,9 @@ define bfloat @v_exp2_bf16(bfloat %a) { ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX8-NEXT: v_add_f32_e32 v0, v0, v1 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX8-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX8-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX8-NEXT: v_not_b32_e32 v1, 63 +; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX8-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v0 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x7fff, v1 @@ -25779,10 +25787,10 @@ define bfloat @v_exp2_bf16(bfloat %a) { ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX9-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc +; GFX9-NEXT: v_not_b32_e32 v1, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX9-NEXT: s_movk_i32 s4, 0x7fff -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX9-NEXT: v_add3_u32 v1, v1, v0, s4 ; GFX9-NEXT: v_or_b32_e32 v2, 0x400000, v0 @@ -25797,10 +25805,10 @@ define bfloat @v_exp2_bf16(bfloat %a) { ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo -; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX10-NEXT: v_exp_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX10-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v2, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 @@ -25816,12 +25824,12 @@ define bfloat @v_exp2_bf16(bfloat %a) { ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xc2fc0000, v0 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 0x42800000, vcc_lo -; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x1f800000, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 0xffffffc0, vcc_lo ; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_exp_f32_e32 v0, v0 ; GFX11-NEXT: s_waitcnt_depctr 0xfff -; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX11-NEXT: v_bfe_u32 v1, v0, 16, 1 ; GFX11-NEXT: v_or_b32_e32 v2, 0x400000, v0 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 diff --git a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll index b2e4117096ce5..fdc9704a3784e 100644 --- a/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll +++ b/llvm/test/CodeGen/AMDGPU/copysign-simplify-demanded-bits.ll @@ -337,9 +337,9 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_mov_b32 s4, 0x800000 ; GFX9-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc -; GFX9-NEXT: v_mul_f32_e64 v3, |v0|, v3 +; GFX9-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX9-NEXT: v_ldexp_f32 v3, |v0|, v3 ; GFX9-NEXT: v_log_f32_e32 v3, v3 ; GFX9-NEXT: v_cvt_f32_i32_e32 v1, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x42000000 @@ -353,10 +353,10 @@ define float @test_copysign_pow_fast_f32__integral_y(float %x, i32 %y.i) { ; GFX9-NEXT: v_fma_f32 v2, v2, v1, v3 ; GFX9-NEXT: v_cvt_i32_f32_e32 v1, v1 ; GFX9-NEXT: v_exp_f32_e32 v2, v2 -; GFX9-NEXT: v_mov_b32_e32 v3, 0x1f800000 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc +; GFX9-NEXT: v_not_b32_e32 v3, 63 +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 31, v1 -; GFX9-NEXT: v_mul_f32_e32 v2, v2, v3 +; GFX9-NEXT: v_ldexp_f32 v2, v2, v3 ; GFX9-NEXT: v_and_b32_e32 v0, v1, v0 ; GFX9-NEXT: s_brev_b32 s4, -2 ; GFX9-NEXT: v_bfi_b32 v0, s4, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll index cce0fb7e003c5..5b72795ba07ea 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fmul-sel.ll @@ -270,40 +270,36 @@ define float @fmul_select_f32_test7_sel_log2val_pos59_pos92(float %x, i32 %bool. ; GFX7-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v3, 0x6d800000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0x5d000000 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x5c ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 59, vcc +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x6d800000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x5d000000 +; GFX9-NEXT: v_mov_b32_e32 v3, 0x5c ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 59, vcc +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x5d000000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x6d800000, v3, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo +; GFX1030-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f32_test7_sel_log2val_pos59_pos92: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x5d000000 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x6d800000, v3, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0x5c, 59, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0x43A0000000000000, float 0x45B0000000000000 @@ -440,40 +436,36 @@ define float @fmul_select_f32_test11_sel_log2val_pos78_pos56(float %x, i32 %bool ; GFX7-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v3, 0xdb800000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0xe6800000 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x4e ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc +; GFX7-NEXT: v_ldexp_f32_e64 v0, -v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xdb800000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xe6800000 +; GFX9-NEXT: v_mov_b32_e32 v3, 0x4e ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, 56, v3, vcc +; GFX9-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xe6800000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo +; GFX1030-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f32_test11_sel_log2val_pos78_pos56: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0xe6800000 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xdb800000, v3, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 56, 0x4e, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0xC4D0000000000000, float 0xC370000000000000 @@ -485,40 +477,40 @@ define float @fmul_select_f32_test12_sel_log2val_neg48_pos68(float %x, i32 %bool ; GFX7-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v3, 0x61800000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0x27800000 +; GFX7-NEXT: v_mov_b32_e32 v3, 0x44 +; GFX7-NEXT: v_not_b32_e32 v4, 47 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x61800000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x27800000 +; GFX9-NEXT: v_mov_b32_e32 v3, 0x44 +; GFX9-NEXT: v_not_b32_e32 v4, 47 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x27800000 +; GFX1030-NEXT: v_not_b32_e32 v3, 47 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo -; GFX1030-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo +; GFX1030-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f32_test12_sel_log2val_neg48_pos68: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x27800000 +; GFX1100-NEXT: v_not_b32_e32 v3, 47 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x61800000, v3, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x44, v3, vcc_lo +; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, float 0x3CF0000000000000, float 0x4430000000000000 @@ -530,40 +522,34 @@ define double @fmul_select_f64_test1(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-LABEL: fmul_select_f64_test1: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, 2.0, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test1: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test1: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 2.000000e+00, double 1.000000e+00 @@ -575,43 +561,34 @@ define double @fmul_select_f64_test2(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-LABEL: fmul_select_f64_test2: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0x3ff00000 -; GFX7-NEXT: v_mov_b32_e32 v5, 0x3fe00000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3ff00000 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x3fe00000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test2: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3fe00000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test2: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3fe00000 :: v_dual_mov_b32 v4, 0 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3ff00000, v5, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 5.000000e-01, double 1.000000e+00 @@ -623,57 +600,46 @@ define <2 x double> @fmul_select_v2f64_test3(<2 x double> %x, <2 x i32> %bool.ar ; GFX7-LABEL: fmul_select_v2f64_test3: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 -; GFX7-NEXT: v_mov_b32_e32 v8, 0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc -; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc -; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_v2f64_test3: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, 2.0, vcc -; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc -; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_v2f64_test3: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_mov_b32_e32 v8, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_mov_b32_e32 v10, v8 -; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_v2f64_test3: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: v_mov_b32_e32 v8, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_mov_b32_e32 v10, v8 -; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> @@ -685,59 +651,46 @@ define <2 x double> @fmul_select_v2f64_test4(<2 x double> %x, <2 x i32> %bool.ar ; GFX7-LABEL: fmul_select_v2f64_test4: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 -; GFX7-NEXT: v_mov_b32_e32 v12, 0x3fe00000 -; GFX7-NEXT: v_mov_b32_e32 v8, 0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX7-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc -; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX7-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc -; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_v2f64_test4: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 -; GFX9-NEXT: v_mov_b32_e32 v12, 0x3fe00000 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX9-NEXT: v_cndmask_b32_e32 v10, v11, v12, vcc -; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX9-NEXT: v_cndmask_b32_e32 v9, v11, v12, vcc -; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_v2f64_test4: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_mov_b32_e32 v8, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0x3ff00000, v9, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_mov_b32_e32 v10, v8 -; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_v2f64_test4: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0x3ff00000, v9 +; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc_lo ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1100-NEXT: v_cndmask_b32_e32 v9, 0x3ff00000, v9, vcc_lo -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc_lo +; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v5 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> @@ -749,43 +702,34 @@ define double @fmul_select_f64_test5(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-LABEL: fmul_select_f64_test5: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX7-NEXT: v_mov_b32_e32 v5, 0xbfe00000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 -; GFX9-NEXT: v_mov_b32_e32 v5, 0xbfe00000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test5: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0xbfe00000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test5: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v5, 0xbfe00000 :: v_dual_mov_b32 v4, 0 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xbff00000, v5, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -5.000000e-01, double -1.000000e+00 @@ -797,40 +741,34 @@ define double @fmul_select_f64_test6(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-LABEL: fmul_select_f64_test6: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0xbff00000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test6: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0xbff00000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e64 v3, v4, -2.0, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test6: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test6: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: v_mov_b32_e32 v4, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -2.000000e+00, double -1.000000e+00 @@ -887,43 +825,34 @@ define double @fmul_select_f64_test8(double %x, i32 %bool.arg1, i32 %bool.arg2) ; GFX7-LABEL: fmul_select_f64_test8: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0xc0400000 -; GFX7-NEXT: v_mov_b32_e32 v5, 0xc0100000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0xc0400000 -; GFX9-NEXT: v_mov_b32_e32 v5, 0xc0100000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test8: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0xc0100000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test8: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v5, 0xc0100000 :: v_dual_mov_b32 v4, 0 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0xc0400000, v5, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 5, 2, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double -4.000000e+00, double -3.200000e+01 @@ -935,57 +864,46 @@ define <2 x double> @fmul_select_v2f64_test9(<2 x double> %x, <2 x i32> %bool.ar ; GFX7-LABEL: fmul_select_v2f64_test9: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v11, 0xbff00000 -; GFX7-NEXT: v_mov_b32_e32 v8, 0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX7-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc -; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc -; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_v2f64_test9: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v11, 0xbff00000 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX9-NEXT: v_cndmask_b32_e64 v10, v11, -2.0, vcc -; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, -2.0, vcc -; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_v2f64_test9: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_mov_b32_e32 v8, 0 -; GFX1030-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_mov_b32_e32 v10, v8 -; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 +; GFX1030-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_v2f64_test9: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1100-NEXT: v_mov_b32_e32 v8, 0 -; GFX1100-NEXT: v_cndmask_b32_e64 v11, 0xbff00000, -2.0, vcc_lo +; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_mov_b32_e32 v10, v8 -; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0xbff00000, -2.0, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f64 v[0:1], -v[0:1], v4 +; GFX1100-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc_lo +; GFX1100-NEXT: v_ldexp_f64 v[2:3], -v[2:3], v5 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> @@ -997,61 +915,56 @@ define <2 x double> @fmul_select_v2f64_test10(<2 x double> %x, <2 x i32> %bool.a ; GFX7-LABEL: fmul_select_v2f64_test10: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v8, 0 -; GFX7-NEXT: v_mov_b32_e32 v9, 0xbff00000 -; GFX7-NEXT: v_mov_b32_e32 v10, 0x3fe00000 +; GFX7-NEXT: v_mov_b32_e32 v8, 0xbff00000 +; GFX7-NEXT: v_mov_b32_e32 v9, 0x3fe00000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX7-NEXT: v_mov_b32_e32 v11, 0x3ff00000 -; GFX7-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc -; GFX7-NEXT: v_mov_b32_e32 v9, v8 +; GFX7-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX7-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc -; GFX7-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX7-NEXT: v_mov_b32_e32 v8, 0 +; GFX7-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX7-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_v2f64_test10: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v8, 0 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xbff00000 -; GFX9-NEXT: v_mov_b32_e32 v10, 0x3fe00000 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xbff00000 +; GFX9-NEXT: v_mov_b32_e32 v9, 0x3fe00000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v11, 0x3ff00000 -; GFX9-NEXT: v_cndmask_b32_e32 v10, v9, v10, vcc -; GFX9-NEXT: v_mov_b32_e32 v9, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[9:10] -; GFX9-NEXT: v_cndmask_b32_e64 v9, v11, 2.0, vcc -; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX9-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_v2f64_test10: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v9, 0x3fe00000 +; GFX1030-NEXT: v_mov_b32_e32 v8, 0x3fe00000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 -; GFX1030-NEXT: v_mov_b32_e32 v8, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v11, 0xbff00000, v9, vcc_lo +; GFX1030-NEXT: v_cndmask_b32_e32 v9, 0xbff00000, v8, vcc_lo ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1030-NEXT: v_mov_b32_e32 v10, v8 -; GFX1030-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1030-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1030-NEXT: v_mov_b32_e32 v8, 0 +; GFX1030-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo +; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX1030-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_v2f64_test10: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v9, 0x3fe00000 :: v_dual_mov_b32 v8, 0 +; GFX1100-NEXT: v_mov_b32_e32 v8, 0x3fe00000 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_cndmask_b32 v11, 0xbff00000, v9 +; GFX1100-NEXT: v_dual_cndmask_b32 v9, 0xbff00000, v8 :: v_dual_mov_b32 v8, 0 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[10:11] -; GFX1100-NEXT: v_cndmask_b32_e64 v9, 0x3ff00000, 2.0, vcc_lo +; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[8:9] +; GFX1100-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc_lo ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1100-NEXT: v_mul_f64 v[2:3], v[2:3], v[8:9] +; GFX1100-NEXT: v_ldexp_f64 v[2:3], v[2:3], v4 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq <2 x i32> %bool.arg1, %bool.arg2 %y = select <2 x i1> %bool, <2 x double> , <2 x double> @@ -1199,43 +1112,40 @@ define double @fmul_select_f64_test14_sel_log2val_pos92_neg27(double %x, i32 %bo ; GFX7-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0x3e400000 -; GFX7-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX7-NEXT: v_not_b32_e32 v4, 26 +; GFX7-NEXT: v_mov_b32_e32 v5, 0x5c ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3e400000 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX9-NEXT: v_not_b32_e32 v4, 26 +; GFX9-NEXT: v_mov_b32_e32 v5, 0x5c ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x45b00000 +; GFX1030-NEXT: v_mov_b32_e32 v4, 0x5c ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3e400000, v5, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test14_sel_log2val_pos92_neg27: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v5, 0x45b00000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_mov_b32_e32 v4, 0x5c ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3e400000, v5, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0xffffffe5, v4, vcc_lo +; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 0x45B0000000000000, double 0x3E40000000000000 @@ -1247,43 +1157,40 @@ define double @fmul_select_f64_test15_sel_log2val_neg42_neg33(double %x, i32 %bo ; GFX7-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v4, 0x3de00000 -; GFX7-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX7-NEXT: v_not_b32_e32 v4, 32 +; GFX7-NEXT: v_not_b32_e32 v5, 41 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX7-NEXT: v_mov_b32_e32 v2, 0 -; GFX7-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX7-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX7-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3de00000 -; GFX9-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX9-NEXT: v_not_b32_e32 v4, 32 +; GFX9-NEXT: v_not_b32_e32 v5, 41 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc -; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc +; GFX9-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v5, 0x3d500000 +; GFX1030-NEXT: v_not_b32_e32 v4, 41 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 -; GFX1030-NEXT: v_mov_b32_e32 v4, 0 -; GFX1030-NEXT: v_cndmask_b32_e32 v5, 0x3de00000, v5, vcc_lo -; GFX1030-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1030-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo +; GFX1030-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f64_test15_sel_log2val_neg42_neg33: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_dual_mov_b32 v5, 0x3d500000 :: v_dual_mov_b32 v4, 0 +; GFX1100-NEXT: v_not_b32_e32 v4, 41 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v3 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v5, 0x3de00000, v5, vcc_lo -; GFX1100-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] +; GFX1100-NEXT: v_cndmask_b32_e32 v2, 0xffffffdf, v4, vcc_lo +; GFX1100-NEXT: v_ldexp_f64 v[0:1], v[0:1], v2 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, double 0x3D50000000000000, double 0x3DE0000000000000 @@ -1298,38 +1205,34 @@ define half @fmul_select_f16_test1(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 2.0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test1: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f16_test1: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f16_test1: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 2.000000e+00, half 1.000000e+00 @@ -1343,38 +1246,41 @@ define half @fmul_select_f16_test2(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, 1.0, 0.5, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x3c00 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x3800 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; GFX9-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f16_test2: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x3800 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_movk_i32 s4, 0x8000 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff +; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f16_test2: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x3800 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_movk_i32 s0, 0x8000 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 5.000000e-01, half 1.000000e+00 @@ -1507,40 +1413,35 @@ define half @fmul_select_f16_test5(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x41000000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v3, 2.0, vcc +; GFX7-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x4800 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x4000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc +; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f16_test5: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x4000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo +; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f16_test5: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x4000 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x4800, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 3, 1, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 2.000000e+00, half 8.000000e+00 @@ -1688,42 +1589,36 @@ define half @fmul_select_f16_test9(half %x, i32 %bool.arg1, i32 %bool.arg2) { ; GFX7-LABEL: fmul_select_f16_test9: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0xc2000000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0xc1800000 +; GFX7-NEXT: v_cvt_f16_f32_e64 v0, -v0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test9: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0xd000 -; GFX9-NEXT: v_mov_b32_e32 v4, 0xcc00 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc +; GFX9-NEXT: v_ldexp_f16_e64 v0, -v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f16_test9: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0xcc00 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo +; GFX1030-NEXT: v_ldexp_f16_e64 v0, -v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f16_test9: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0xcc00 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0xd000, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 5, 4, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f16_e64 v0, -v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half -1.600000e+01, half -3.200000e+01 @@ -1736,41 +1631,42 @@ define half @fmul_select_f16_test10_sel_log2val_neg11_pos11(half %x, i32 %bool.a ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x45000000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0x3a000000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x6800 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x1000 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc +; GFX9-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x1000 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x6800, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_movk_i32 s4, 0x8000 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo +; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff +; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f16_test10_sel_log2val_neg11_pos11: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x1000 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x6800, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_movk_i32 s0, 0x8000 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 11, -11, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 0xH1000, half 0xH6800 @@ -1783,41 +1679,42 @@ define half @fmul_select_f16_test11_sel_log2val_pos7_neg14(half %x, i32 %bool.ar ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, 0x38800000 -; GFX7-NEXT: v_mov_b32_e32 v4, 0x43000000 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 +; GFX7-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX7-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v3, 0x400 -; GFX9-NEXT: v_mov_b32_e32 v4, 0x5800 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc -; GFX9-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc +; GFX9-NEXT: s_movk_i32 s4, 0x8000 +; GFX9-NEXT: v_mov_b32_e32 v2, 0x7fff +; GFX9-NEXT: v_med3_i32 v1, v1, s4, v2 +; GFX9-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX1030-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: ; GFX1030: ; %bb.0: ; GFX1030-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1030-NEXT: v_mov_b32_e32 v3, 0x5800 ; GFX1030-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1030-NEXT: v_cndmask_b32_e32 v1, 0x400, v3, vcc_lo -; GFX1030-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1030-NEXT: s_movk_i32 s4, 0x8000 +; GFX1030-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo +; GFX1030-NEXT: v_med3_i32 v1, v1, s4, 0x7fff +; GFX1030-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1030-NEXT: s_setpc_b64 s[30:31] ; ; GFX1100-LABEL: fmul_select_f16_test11_sel_log2val_pos7_neg14: ; GFX1100: ; %bb.0: ; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100-NEXT: v_mov_b32_e32 v3, 0x5800 ; GFX1100-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-NEXT: v_cndmask_b32_e32 v1, 0x400, v3, vcc_lo -; GFX1100-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX1100-NEXT: s_movk_i32 s0, 0x8000 +; GFX1100-NEXT: v_cndmask_b32_e64 v1, -14, 7, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_med3_i32 v1, v1, s0, 0x7fff +; GFX1100-NEXT: v_ldexp_f16_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] %bool = icmp eq i32 %bool.arg1, %bool.arg2 %y = select i1 %bool, half 0xH5800, half 0xH0400 diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll index 2140f50611d71..ebfb5e9ccaa35 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll @@ -1613,14 +1613,12 @@ define float @v_recip_sqrt_f32_ulp25_contract(float %x) { ; CODEGEN-IEEE-SDAG: ; %bb.0: ; CODEGEN-IEEE-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CODEGEN-IEEE-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; CODEGEN-IEEE-SDAG-NEXT: v_mov_b32_e32 v1, 0x4b800000 ; CODEGEN-IEEE-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc +; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-SDAG-NEXT: v_rsq_f32_e32 v0, v0 -; CODEGEN-IEEE-SDAG-NEXT: v_mov_b32_e32 v1, 0x45800000 -; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; CODEGEN-IEEE-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; CODEGEN-IEEE-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc +; CODEGEN-IEEE-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; CODEGEN-IEEE-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; CODEGEN-IEEE-GISEL-LABEL: v_recip_sqrt_f32_ulp25_contract: diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll index 711a5fff1a063..104e157e9e15a 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll @@ -3307,22 +3307,54 @@ define amdgpu_ps i32 @s_mul_32_f16(half inreg %x, half inreg %y) { ; -------------------------------------------------------------------- define float @v_mul_f32_select_64_1(i32 %arg, float %x) { -; GFX9-LABEL: v_mul_f32_select_64_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_f32_select_64_1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1011-LABEL: v_mul_f32_select_64_1: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo -; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX1011-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_f32_select_64_1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_mul_f32_select_64_1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_f32_select_64_1: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_f32_select_64_1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_f32_select_64_1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, float 64.0, float 1.0 %mul = fmul float %x, %select.pow2 @@ -3330,22 +3362,54 @@ define float @v_mul_f32_select_64_1(i32 %arg, float %x) { } define float @v_mul_f32_select_1_64(i32 %arg, float %x) { -; GFX9-LABEL: v_mul_f32_select_1_64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 1.0, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_f32_select_1_64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1011-LABEL: v_mul_f32_select_1_64: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo -; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX1011-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_f32_select_1_64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 1.0, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_mul_f32_select_1_64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_f32_select_1_64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_f32_select_1_64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_f32_select_1_64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0x42800000, 1.0, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, float 1.0, float 64.0 %mul = fmul float %x, %select.pow2 @@ -3353,22 +3417,54 @@ define float @v_mul_f32_select_1_64(i32 %arg, float %x) { } define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) { -; GFX9-LABEL: v_mul_f32_select_n1_n64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -1.0, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_f32_select_n1_n64: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1011-LABEL: v_mul_f32_select_n1_n64: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo -; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX1011-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_f32_select_n1_n64: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1.0, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_mul_f32_select_n1_n64: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_f32_select_n1_n64: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_f32_select_n1_n64: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_f32_select_n1_n64: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0xc2800000, -1.0, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, float -1.0, float -64.0 %mul = fmul float %x, %select.pow2 @@ -3376,22 +3472,54 @@ define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) { } define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) { -; GFX9-LABEL: v_mul_f32_select_n64_n1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v2, 0xc2800000 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc -; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-SDAG-LABEL: v_mul_f32_select_n64_n1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX1011-LABEL: v_mul_f32_select_n64_n1: -; GFX1011: ; %bb.0: -; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1011-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo -; GFX1011-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX1011-NEXT: s_setpc_b64 s[30:31] +; GFX9-GISEL-LABEL: v_mul_f32_select_n64_n1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc2800000 +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, -1.0, v2, vcc +; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_mul_f32_select_n64_n1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_mul_f32_select_n64_n1: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo +; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_mul_f32_select_n64_n1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_mul_f32_select_n64_n1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, -1.0, 0xc2800000, vcc_lo +; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cond = icmp eq i32 %arg, 0 %select.pow2 = select i1 %cond, float -64.0, float -1.0 %mul = fmul float %x, %select.pow2 @@ -3402,11 +3530,9 @@ define float @v_mul_f32_select_128_64(i32 %arg, float %x) { ; GFX9-SDAG-LABEL: v_mul_f32_select_128_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x43000000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f32_select_128_64: @@ -3422,10 +3548,9 @@ define float @v_mul_f32_select_128_64(i32 %arg, float %x) { ; GFX10-SDAG-LABEL: v_mul_f32_select_128_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x43000000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x42800000, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f32_select_128_64: @@ -3440,10 +3565,9 @@ define float @v_mul_f32_select_128_64(i32 %arg, float %x) { ; GFX11-SDAG-LABEL: v_mul_f32_select_128_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x43000000 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x42800000, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f32_select_128_64: @@ -3464,11 +3588,9 @@ define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) { ; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xc2800000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc3000000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n64: @@ -3484,10 +3606,9 @@ define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) { ; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc2800000, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n64: @@ -3502,10 +3623,9 @@ define float @v_mul_f32_select_n128_n64(i32 %arg, float %x) { ; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc2800000, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n64: @@ -3526,11 +3646,9 @@ define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) { ; GFX9-SDAG-LABEL: v_mul_f32_select_n128_n16: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xc1800000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc3000000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f32_select_n128_n16: @@ -3546,10 +3664,9 @@ define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) { ; GFX10-SDAG-LABEL: v_mul_f32_select_n128_n16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc1800000, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f32_select_n128_n16: @@ -3564,10 +3681,9 @@ define float @v_mul_f32_select_n128_n16(i32 %arg, float %x) { ; GFX11-SDAG-LABEL: v_mul_f32_select_n128_n16: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xc3000000 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xc1800000, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f32 v0, -v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f32_select_n128_n16: @@ -3818,12 +3934,9 @@ define double @v_mul_f64_select_64_1(i32 %arg, double %x) { ; GFX9-SDAG-LABEL: v_mul_f64_select_64_1: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3ff00000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x40500000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f64_select_64_1: @@ -3840,11 +3953,9 @@ define double @v_mul_f64_select_64_1(i32 %arg, double %x) { ; GFX10-SDAG-LABEL: v_mul_f64_select_64_1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x40500000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x3ff00000, v4, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f64_select_64_1: @@ -3860,10 +3971,9 @@ define double @v_mul_f64_select_64_1(i32 %arg, double %x) { ; GFX11-SDAG-LABEL: v_mul_f64_select_64_1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0x40500000 :: v_dual_mov_b32 v3, 0 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x3ff00000, v4, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f64_select_64_1: @@ -3884,12 +3994,9 @@ define double @v_mul_f64_select_1_64(i32 %arg, double %x) { ; GFX9-SDAG-LABEL: v_mul_f64_select_1_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x40500000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x3ff00000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f64_select_1_64: @@ -3906,11 +4013,9 @@ define double @v_mul_f64_select_1_64(i32 %arg, double %x) { ; GFX10-SDAG-LABEL: v_mul_f64_select_1_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x3ff00000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f64_select_1_64: @@ -3926,10 +4031,9 @@ define double @v_mul_f64_select_1_64(i32 %arg, double %x) { ; GFX11-SDAG-LABEL: v_mul_f64_select_1_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0x3ff00000 :: v_dual_mov_b32 v3, 0 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f64_select_1_64: @@ -3950,12 +4054,9 @@ define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) { ; GFX9-SDAG-LABEL: v_mul_f64_select_n1_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0500000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f64_select_n1_n64: @@ -3972,11 +4073,9 @@ define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) { ; GFX10-SDAG-LABEL: v_mul_f64_select_n1_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xbff00000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f64_select_n1_n64: @@ -3992,10 +4091,9 @@ define double @v_mul_f64_select_n1_n64(i32 %arg, double %x) { ; GFX11-SDAG-LABEL: v_mul_f64_select_n1_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0xbff00000 :: v_dual_mov_b32 v3, 0 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f64_select_n1_n64: @@ -4016,12 +4114,9 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) { ; GFX9-SDAG-LABEL: v_mul_f64_select_128_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x40500000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0x40600000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f64_select_128_64: @@ -4038,11 +4133,9 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) { ; GFX10-SDAG-LABEL: v_mul_f64_select_128_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0x40600000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f64_select_128_64: @@ -4058,10 +4151,9 @@ define double @v_mul_f64_select_128_64(i32 %arg, double %x) { ; GFX11-SDAG-LABEL: v_mul_f64_select_128_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0x40600000 :: v_dual_mov_b32 v3, 0 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0x40500000, v4, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], v[1:2], v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f64_select_128_64: @@ -4082,12 +4174,9 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { ; GFX9-SDAG-LABEL: v_mul_f64_select_n128_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0500000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n64: @@ -4104,11 +4193,9 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { ; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n64: @@ -4124,10 +4211,9 @@ define double @v_mul_f64_select_n128_n64(i32 %arg, double %x) { ; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0xc0600000 :: v_dual_mov_b32 v3, 0 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0500000, v4, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n64: @@ -4148,12 +4234,9 @@ define double @v_mul_f64_select_n128_n16(i32 %arg, double %x) { ; GFX9-SDAG-LABEL: v_mul_f64_select_n128_n16: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xc0300000 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX9-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f64_select_n128_n16: @@ -4170,11 +4253,9 @@ define double @v_mul_f64_select_n128_n16(i32 %arg, double %x) { ; GFX10-SDAG-LABEL: v_mul_f64_select_n128_n16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v4, 0xc0600000 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0300000, v4, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f64_select_n128_n16: @@ -4190,10 +4271,9 @@ define double @v_mul_f64_select_n128_n16(i32 %arg, double %x) { ; GFX11-SDAG-LABEL: v_mul_f64_select_n128_n16: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, 0xc0600000 :: v_dual_mov_b32 v3, 0 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v4, 0xc0300000, v4, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f64 v[0:1], v[1:2], v[3:4] +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], -v[1:2], v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f64_select_n128_n16: @@ -4719,11 +4799,9 @@ define half @v_mul_f16_select_64_1(i32 %arg, half %x) { ; GFX9-SDAG-LABEL: v_mul_f16_select_64_1: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5400 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f16_select_64_1: @@ -4739,10 +4817,9 @@ define half @v_mul_f16_select_64_1(i32 %arg, half %x) { ; GFX10-SDAG-LABEL: v_mul_f16_select_64_1: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f16_select_64_1: @@ -4757,10 +4834,9 @@ define half @v_mul_f16_select_64_1(i32 %arg, half %x) { ; GFX11-SDAG-LABEL: v_mul_f16_select_64_1: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f16_select_64_1: @@ -4781,11 +4857,9 @@ define half @v_mul_f16_select_1_64(i32 %arg, half %x) { ; GFX9-SDAG-LABEL: v_mul_f16_select_1_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x3c00 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f16_select_1_64: @@ -4801,10 +4875,9 @@ define half @v_mul_f16_select_1_64(i32 %arg, half %x) { ; GFX10-SDAG-LABEL: v_mul_f16_select_1_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f16_select_1_64: @@ -4819,10 +4892,9 @@ define half @v_mul_f16_select_1_64(i32 %arg, half %x) { ; GFX11-SDAG-LABEL: v_mul_f16_select_1_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x3c00 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f16_select_1_64: @@ -4843,11 +4915,9 @@ define half @v_mul_f16_select_n1_n64(i32 %arg, half %x) { ; GFX9-SDAG-LABEL: v_mul_f16_select_n1_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xd400 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xbc00 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f16_select_n1_n64: @@ -4863,10 +4933,9 @@ define half @v_mul_f16_select_n1_n64(i32 %arg, half %x) { ; GFX10-SDAG-LABEL: v_mul_f16_select_n1_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xbc00 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f16_select_n1_n64: @@ -4881,10 +4950,9 @@ define half @v_mul_f16_select_n1_n64(i32 %arg, half %x) { ; GFX11-SDAG-LABEL: v_mul_f16_select_n1_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xbc00 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f16_select_n1_n64: @@ -4905,11 +4973,9 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) { ; GFX9-SDAG-LABEL: v_mul_f16_select_128_64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x5400 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0x5800 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f16_select_128_64: @@ -4925,10 +4991,9 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) { ; GFX10-SDAG-LABEL: v_mul_f16_select_128_64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0x5800 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f16_select_128_64: @@ -4943,10 +5008,9 @@ define half @v_mul_f16_select_128_64(i32 %arg, half %x) { ; GFX11-SDAG-LABEL: v_mul_f16_select_128_64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0x5800 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0x5400, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f16_select_128_64: @@ -4967,11 +5031,9 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { ; GFX9-SDAG-LABEL: v_mul_f16_select_n128_n64: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xd400 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n64: @@ -4987,10 +5049,9 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { ; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n64: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n64: @@ -5005,10 +5066,9 @@ define half @v_mul_f16_select_n128_n64(i32 %arg, half %x) { ; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n64: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xd400, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n64: @@ -5029,11 +5089,9 @@ define half @v_mul_f16_select_n128_n16(i32 %arg, half %x) { ; GFX9-SDAG-LABEL: v_mul_f16_select_n128_n16: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0xcc00 -; GFX9-SDAG-NEXT: v_mov_b32_e32 v3, 0xd800 ; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc +; GFX9-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: v_mul_f16_select_n128_n16: @@ -5049,10 +5107,9 @@ define half @v_mul_f16_select_n128_n16(i32 %arg, half %x) { ; GFX10-SDAG-LABEL: v_mul_f16_select_n128_n16: ; GFX10-SDAG: ; %bb.0: ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800 ; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xcc00, v2, vcc_lo -; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX10-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-GISEL-LABEL: v_mul_f16_select_n128_n16: @@ -5067,10 +5124,9 @@ define half @v_mul_f16_select_n128_n16(i32 %arg, half %x) { ; GFX11-SDAG-LABEL: v_mul_f16_select_n128_n16: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v2, 0xd800 ; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e32 v0, 0xcc00, v2, vcc_lo -; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v1, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 4, 7, vcc_lo +; GFX11-SDAG-NEXT: v_ldexp_f16_e64 v0, -v1, v0 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-GISEL-LABEL: v_mul_f16_select_n128_n16: diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll index 8e04a240d0a1c..b3001819e9aaf 100644 --- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll @@ -2368,14 +2368,12 @@ define float @v_sqrt_f32_ulp2_contractable_rcp(float %x) { ; SDAG-IEEE: ; %bb.0: ; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 -; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4b800000 ; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc +; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SDAG-IEEE-NEXT: v_rsq_f32_e32 v0, v0 -; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x45800000 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v1 +; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc +; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp: @@ -2718,20 +2716,18 @@ define <2 x float> @v_sqrt_v2f32_ulp2_contractable_rcp(<2 x float> %x) { ; SDAG-IEEE: ; %bb.0: ; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 -; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x4b800000 ; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, 1.0, v2, vcc +; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc ; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 -; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v3 -; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v2, s[4:5] +; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, s[4:5] ; SDAG-IEEE-NEXT: v_rsq_f32_e32 v0, v0 -; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2 +; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SDAG-IEEE-NEXT: v_rsq_f32_e32 v1, v1 -; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x45800000 -; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc -; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v0, v2 -; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 1.0, v3, s[4:5] -; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, v1, v2 +; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc +; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, s[4:5] +; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 ; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll index 30cc060d05bb1..ac515808a0d8a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll @@ -15,17 +15,17 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-NEXT: s_load_dword s2, s[4:5], 0xb ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s4, 0xffffffc0, 0 ; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, v0, s4 ; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; @@ -53,15 +53,15 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { ; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c ; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; VI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, v1, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; VI-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 +; VI-SDAG-NEXT: v_ldexp_f32 v2, v0, s2 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 ; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 @@ -91,17 +91,17 @@ define amdgpu_kernel void @s_exp2_f32(ptr addrspace(1) %out, float %in) { ; GFX900-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c ; GFX900-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v1, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v1, v0 -; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, s2 +; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp2_f32: @@ -175,25 +175,26 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc -; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_exp_f32_e32 v3, v1 -; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc ; SI-SDAG-NEXT: s_mov_b32 s4, s0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0 +; SI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; SI-SDAG-NEXT: v_add_f32_e32 v2, s3, v2 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v1, v2, s0 +; SI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, v0, s0 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; @@ -225,22 +226,23 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; VI-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec +; VI-SDAG-NEXT: v_add_f32_e32 v2, s3, v2 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; VI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_exp_f32_e32 v2, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_cselect_b32 s3, 0xffffffc0, 0 +; VI-SDAG-NEXT: v_ldexp_f32 v1, v2, s3 +; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; VI-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, s2 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-SDAG-NEXT: s_endpgm @@ -273,23 +275,24 @@ define amdgpu_kernel void @s_exp2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v2, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc +; GFX900-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s3, v4 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v4, v3 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v2, v0 -; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, s3, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_cselect_b32 s4, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v3, s4 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, s2 +; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp2_v2f32: @@ -384,29 +387,31 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; SI-SDAG-NEXT: s_and_b64 s[8:9], vcc, exec ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v1, s1, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; SI-SDAG-NEXT: v_exp_f32_e32 v1, v1 +; SI-SDAG-NEXT: s_cselect_b32 s1, 0xffffffc0, 0 +; SI-SDAG-NEXT: s_and_b64 s[8:9], vcc, exec +; SI-SDAG-NEXT: v_add_f32_e32 v3, s0, v3 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v4, s1, v4 +; SI-SDAG-NEXT: v_exp_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc ; SI-SDAG-NEXT: v_add_f32_e32 v0, s2, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v6, s0, v6 -; SI-SDAG-NEXT: v_exp_f32_e32 v3, v0 -; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v1, v1, s1 +; SI-SDAG-NEXT: s_cselect_b32 s1, 0xffffffc0, 0 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, v3, s1 +; SI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 ; SI-SDAG-NEXT: s_mov_b32 s6, -1 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v4, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, v3, v7 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v6, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v2, v2, s0 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm @@ -446,31 +451,34 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-SDAG-LABEL: s_exp2_v3f32: ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 -; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v2, s2, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 +; VI-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec +; VI-SDAG-NEXT: s_cselect_b32 s4, 0xffffffc0, 0 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v2, v2, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc +; VI-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; VI-SDAG-NEXT: v_add_f32_e32 v4, s2, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v1, s0, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v6, s1, v6 -; VI-SDAG-NEXT: v_exp_f32_e32 v3, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, v4, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, s4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, v6, v5 -; VI-SDAG-NEXT: v_mov_b32_e32 v4, s5 +; VI-SDAG-NEXT: v_add_f32_e32 v3, s1, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; VI-SDAG-NEXT: v_exp_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v0, s0, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_cselect_b32 s4, 0xffffffc0, 0 +; VI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; VI-SDAG-NEXT: v_ldexp_f32 v1, v3, s4 +; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_mov_b32_e32 v4, s3 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, s0 +; VI-SDAG-NEXT: v_mov_b32_e32 v3, s2 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-SDAG-NEXT: s_endpgm ; @@ -510,29 +518,31 @@ define amdgpu_kernel void @s_exp2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, s2, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v2, s2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s2, v4 -; GFX900-SDAG-NEXT: v_add_f32_e32 v6, s1, v6 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, s1, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, s0, v0 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v3, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v4, v2 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v6, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v3, v0 -; GFX900-SDAG-NEXT: global_store_dwordx3 v7, v[0:2], s[6:7] +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v4, s2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, s0 +; GFX900-SDAG-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_exp2_v3f32: @@ -659,35 +669,38 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; SI-SDAG-NEXT: s_and_b64 s[8:9], vcc, exec ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v2, s7, v2 +; SI-SDAG-NEXT: v_add_f32_e32 v3, s6, v3 +; SI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v4, v3 +; SI-SDAG-NEXT: s_cselect_b32 s8, 0xffffffc0, 0 +; SI-SDAG-NEXT: s_and_b64 s[6:7], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s6, 0xffffffc0, 0 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v3, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e64 v3, v2, s8 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v2, v4, s6 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; SI-SDAG-NEXT: s_and_b64 s[6:7], vcc, exec ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-SDAG-NEXT: v_add_f32_e32 v4, s7, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v6, s6, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v8, s5, v8 -; SI-SDAG-NEXT: v_add_f32_e32 v1, s4, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v4, s5, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v0, s4, v0 ; SI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_exp_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_exp_f32_e32 v8, v8 -; SI-SDAG-NEXT: v_exp_f32_e32 v9, v1 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, v4, v2 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, v6, v5 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, v8, v7 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v9, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: s_cselect_b32 s6, 0xffffffc0, 0 +; SI-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec +; SI-SDAG-NEXT: s_cselect_b32 s4, 0xffffffc0, 0 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v1, v4, s6 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, v0, s4 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; @@ -733,34 +746,37 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; VI-SDAG-NEXT: s_and_b64 s[6:7], vcc, exec ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v2, s3, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, s2, v3 +; VI-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v4, v3 +; VI-SDAG-NEXT: s_cselect_b32 s6, 0xffffffc0, 0 +; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; VI-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v3, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v3, v2, s6 +; VI-SDAG-NEXT: v_ldexp_f32 v2, v4, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; VI-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v4, s3, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v6, s2, v6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v4, s1, v4 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-SDAG-NEXT: v_exp_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_exp_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_add_f32_e32 v8, s1, v8 -; VI-SDAG-NEXT: v_add_f32_e32 v1, s0, v1 -; VI-SDAG-NEXT: v_exp_f32_e32 v8, v8 -; VI-SDAG-NEXT: v_exp_f32_e32 v9, v1 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, v4, v2 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, v6, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v0, s0, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 +; VI-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; VI-SDAG-NEXT: v_ldexp_f32 v1, v4, s2 +; VI-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, v8, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v9, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, s0 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-SDAG-NEXT: s_endpgm @@ -807,34 +823,37 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0xc2fc0000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; GFX900-SDAG-NEXT: s_and_b64 s[4:5], vcc, exec ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, s3, v2 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, s2, v3 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v3 +; GFX900-SDAG-NEXT: s_cselect_b32 s4, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v9, 0, v3, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v3, v2, s4 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, v5, s2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc +; GFX900-SDAG-NEXT: s_and_b64 s[2:3], vcc, exec ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; GFX900-SDAG-NEXT: v_add_f32_e32 v5, s3, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v7, s2, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v9, s1, v9 -; GFX900-SDAG-NEXT: v_add_f32_e32 v1, s0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v5, s1, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, s0, v0 ; GFX900-SDAG-NEXT: v_exp_f32_e32 v5, v5 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v7, v7 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v9, v9 -; GFX900-SDAG-NEXT: v_exp_f32_e32 v10, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, v5, v2 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, v7, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, v9, v8 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v10, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: s_cselect_b32 s2, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: s_and_b64 s[0:1], vcc, exec +; GFX900-SDAG-NEXT: s_cselect_b32 s0, 0xffffffc0, 0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, v5, s2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, s0 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-SDAG-NEXT: s_endpgm ; @@ -973,19 +992,19 @@ define amdgpu_kernel void @s_exp2_v4f32(ptr addrspace(1) %out, <4 x float> %in) } define float @v_exp2_f32(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32: ; GCN-GISEL: ; %bb.0: @@ -1001,6 +1020,34 @@ define float @v_exp2_f32(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1015,19 +1062,19 @@ define float @v_exp2_f32(float %in) { } define float @v_exp2_fabs_f32(float %in) { -; GCN-SDAG-LABEL: v_exp2_fabs_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_fabs_f32: ; GCN-GISEL: ; %bb.0: @@ -1043,6 +1090,34 @@ define float @v_exp2_fabs_f32(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_fabs_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1058,19 +1133,19 @@ define float @v_exp2_fabs_f32(float %in) { } define float @v_exp2_fneg_fabs_f32(float %in) { -; GCN-SDAG-LABEL: v_exp2_fneg_fabs_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_sub_f32_e64 v0, v2, |v0| -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_fneg_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_sub_f32_e64 v0, v2, |v0| +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_fneg_fabs_f32: ; GCN-GISEL: ; %bb.0: @@ -1086,6 +1161,34 @@ define float @v_exp2_fneg_fabs_f32(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_fneg_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_sub_f32_e64 v0, v2, |v0| +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_fneg_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e64 v0, v2, |v0| +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_fneg_fabs_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1102,19 +1205,19 @@ define float @v_exp2_fneg_fabs_f32(float %in) { } define float @v_exp2_fneg_f32(float %in) { -; GCN-SDAG-LABEL: v_exp2_fneg_f32: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 -; GCN-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_fneg_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_fneg_f32: ; GCN-GISEL: ; %bb.0: @@ -1130,6 +1233,34 @@ define float @v_exp2_fneg_f32(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_fneg_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_fneg_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x42fc0000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_fneg_f32: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1145,19 +1276,19 @@ define float @v_exp2_fneg_f32(float %in) { } define float @v_exp2_f32_fast(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_fast: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_fast: ; GCN-GISEL: ; %bb.0: @@ -1173,6 +1304,34 @@ define float @v_exp2_f32_fast(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_fast: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_fast: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_fast: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1187,19 +1346,19 @@ define float @v_exp2_f32_fast(float %in) { } define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GCN-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_unsafe_math_attr: ; GCN-GISEL: ; %bb.0: @@ -1215,6 +1374,34 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_unsafe_math_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_unsafe_math_attr: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1229,19 +1416,19 @@ define float @v_exp2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { } define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GCN-SDAG-LABEL: v_exp2_f32_approx_fn_attr: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_approx_fn_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_approx_fn_attr: ; GCN-GISEL: ; %bb.0: @@ -1257,6 +1444,34 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_approx_fn_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_approx_fn_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_approx_fn_attr: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1271,19 +1486,19 @@ define float @v_exp2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" } define float @v_exp2_f32_ninf(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_ninf: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_ninf: ; GCN-GISEL: ; %bb.0: @@ -1299,6 +1514,34 @@ define float @v_exp2_f32_ninf(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1313,19 +1556,19 @@ define float @v_exp2_f32_ninf(float %in) { } define float @v_exp2_f32_afn(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_afn: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_afn: ; GCN-GISEL: ; %bb.0: @@ -1341,6 +1584,34 @@ define float @v_exp2_f32_afn(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_afn: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1375,19 +1646,19 @@ define float @v_exp2_f32_afn_daz(float %in) #0 { } define float @v_exp2_f32_afn_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp2_f32_afn_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_afn_dynamic: ; GCN-GISEL: ; %bb.0: @@ -1403,6 +1674,34 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_afn_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_afn_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_afn_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1417,19 +1716,19 @@ define float @v_exp2_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_exp2_f32_afn(float %in) { -; GCN-SDAG-LABEL: v_fabs_exp2_f32_afn: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_fabs_exp2_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_fabs_exp2_f32_afn: ; GCN-GISEL: ; %bb.0: @@ -1445,6 +1744,34 @@ define float @v_fabs_exp2_f32_afn(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_fabs_exp2_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_fabs_exp2_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e64 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_fabs_exp2_f32_afn: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1480,19 +1807,19 @@ define float @v_exp2_f32_daz(float %in) #0 { } define float @v_exp2_f32_nnan(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_nnan: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_nnan: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_nnan: ; GCN-GISEL: ; %bb.0: @@ -1508,6 +1835,34 @@ define float @v_exp2_f32_nnan(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_nnan: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1542,19 +1897,19 @@ define float @v_exp2_f32_nnan_daz(float %in) #0 { } define float @v_exp2_f32_nnan_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp2_f32_nnan_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_nnan_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_nnan_dynamic: ; GCN-GISEL: ; %bb.0: @@ -1570,6 +1925,34 @@ define float @v_exp2_f32_nnan_dynamic(float %in) #1 { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_nnan_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_nnan_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1604,19 +1987,19 @@ define float @v_exp2_f32_ninf_daz(float %in) #0 { } define float @v_exp2_f32_ninf_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp2_f32_ninf_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_ninf_dynamic: ; GCN-GISEL: ; %bb.0: @@ -1632,6 +2015,34 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_ninf_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1646,19 +2057,19 @@ define float @v_exp2_f32_ninf_dynamic(float %in) #1 { } define float @v_exp2_f32_nnan_ninf(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf: ; GCN-GISEL: ; %bb.0: @@ -1674,6 +2085,34 @@ define float @v_exp2_f32_nnan_ninf(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1708,19 +2147,19 @@ define float @v_exp2_f32_nnan_ninf_daz(float %in) #0 { } define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 { -; GCN-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_nnan_ninf_dynamic: ; GCN-GISEL: ; %bb.0: @@ -1736,6 +2175,34 @@ define float @v_exp2_f32_nnan_ninf_dynamic(float %in) #1 { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_nnan_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_nnan_ninf_dynamic: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1770,19 +2237,19 @@ define float @v_exp2_f32_fast_daz(float %in) #0 { } define float @v_exp2_f32_dynamic_mode(float %in) #1 { -; GCN-SDAG-LABEL: v_exp2_f32_dynamic_mode: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_dynamic_mode: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_dynamic_mode: ; GCN-GISEL: ; %bb.0: @@ -1798,6 +2265,34 @@ define float @v_exp2_f32_dynamic_mode(float %in) #1 { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_dynamic_mode: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_dynamic_mode: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_dynamic_mode: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -1912,9 +2407,9 @@ define float @v_exp2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; SI-GISEL-LABEL: v_exp2_f32_from_fpext_math_f16: @@ -1978,9 +2473,9 @@ define float @v_exp2_f32_from_fpext_bf16(bfloat %src) { ; SI-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; SI-NEXT: v_add_f32_e32 v0, v0, v2 ; SI-NEXT: v_exp_f32_e32 v0, v0 -; SI-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; SI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-NEXT: v_not_b32_e32 v1, 63 +; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-NEXT: s_setpc_b64 s[30:31] ; ; VI-LABEL: v_exp2_f32_from_fpext_bf16: @@ -1993,9 +2488,9 @@ define float @v_exp2_f32_from_fpext_bf16(bfloat %src) { ; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; VI-NEXT: v_add_f32_e32 v0, v0, v1 ; VI-NEXT: v_exp_f32_e32 v0, v0 -; VI-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-NEXT: v_not_b32_e32 v1, 63 +; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-NEXT: s_setpc_b64 s[30:31] ; ; GFX900-LABEL: v_exp2_f32_from_fpext_bf16: @@ -2008,9 +2503,9 @@ define float @v_exp2_f32_from_fpext_bf16(bfloat %src) { ; GFX900-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GFX900-NEXT: v_add_f32_e32 v0, v0, v2 ; GFX900-NEXT: v_exp_f32_e32 v0, v0 -; GFX900-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GFX900-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-NEXT: v_not_b32_e32 v1, 63 +; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-NEXT: s_setpc_b64 s[30:31] ; ; R600-LABEL: v_exp2_f32_from_fpext_bf16: @@ -2850,19 +3345,19 @@ define <3 x half> @v_exp2_v3f16_afn(<3 x half> %in) { } define float @v_exp2_f32_contract(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_contract: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_contract: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_contract: ; GCN-GISEL: ; %bb.0: @@ -2878,6 +3373,34 @@ define float @v_exp2_f32_contract(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_contract: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_contract: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_contract: ; R600: ; %bb.0: ; R600-NEXT: CF_END @@ -2912,19 +3435,19 @@ define float @v_exp2_f32_contract_daz(float %in) #0 { } define float @v_exp2_f32_contract_nnan_ninf(float %in) { -; GCN-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf: -; GCN-SDAG: ; %bb.0: -; GCN-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 -; GCN-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GCN-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 -; GCN-SDAG-NEXT: v_exp_f32_e32 v0, v0 -; GCN-SDAG-NEXT: v_mov_b32_e32 v1, 0x1f800000 -; GCN-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 -; GCN-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GCN-GISEL-LABEL: v_exp2_f32_contract_nnan_ninf: ; GCN-GISEL: ; %bb.0: @@ -2940,6 +3463,34 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) { ; GCN-GISEL-NEXT: v_mul_f32_e32 v0, v0, v1 ; GCN-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; VI-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_not_b32_e32 v1, 63 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_exp2_f32_contract_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0xc2fc0000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x42800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX900-SDAG-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_exp_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_not_b32_e32 v1, 63 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; R600-LABEL: v_exp2_f32_contract_nnan_ninf: ; R600: ; %bb.0: ; R600-NEXT: CF_END diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll index 279ffeab51fb3..218e41faa703d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll @@ -17,19 +17,19 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-NEXT: s_load_dword s0, s[4:5], 0xb ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s1, 0x3377d1cf ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x3f317217 ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1 -; SI-SDAG-NEXT: s_mov_b32 s0, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s1, v2 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 @@ -73,11 +73,11 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; VI-SDAG-NEXT: s_load_dword s0, s[4:5], 0x2c ; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s0, v0 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -134,27 +134,27 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 -; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3f317217 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3 -; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s0, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s1, v3 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x41b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: global_store_dword v0, v1, s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log_f32: @@ -190,21 +190,23 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] @@ -314,38 +316,39 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s8, 0x3377d1cf ; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s3, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s3, 0x3f317217 ; SI-SDAG-NEXT: s_mov_b32 s4, s0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s3, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s8, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s9 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s2, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v3 ; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -394,43 +397,44 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s7, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s6, v0 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v3 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v3 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 @@ -488,36 +492,37 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3f317217 ; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3377d1cf +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s11, v3 -; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s11, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v3 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s2, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s3, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, s10, v0 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v4 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 ; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX900-SDAG-NEXT: s_endpgm @@ -564,31 +569,37 @@ define amdgpu_kernel void @s_log_v2f32(ptr addrspace(1) %out, <2 x float> %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3f317217, v0 :: v_dual_mul_f32 v3, 0x3f317217, v1 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v3, 0x3f317217, v1 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s3, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v5, 0x3377d1cf, v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5 ; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3f317217, v0, -v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3f317217, v1, -v3 -; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3377d1cf, v0 :: v_dual_fmac_f32 v5, 0x3377d1cf, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v4, 0x3377d1cf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x41b17218, s5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| ; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5 ; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX1100-SDAG-NEXT: s_endpgm @@ -742,49 +753,51 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s11, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s9, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s9, 0x3f317217 -; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v1 -; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3 -; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s9, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s11, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3 -; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, s8, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41b17218 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 -; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0 -; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3] -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 -; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v4 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s10, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-SDAG-NEXT: v_log_f32_e32 v5, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v5 +; SI-SDAG-NEXT: v_fma_f32 v4, v5, s9, -v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v5, s11, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v5|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -847,47 +860,49 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s10, v2 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s10, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v3, s9, v3 -; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v2, s9, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v2 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 -; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s8, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v5, v3, v1 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 -; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[2:3] ; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 ; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 @@ -978,39 +993,41 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX900-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s10, v2 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s10, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s4, -v4 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s5, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s4, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s5, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s9, v4 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, s9, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s8, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317217, v4 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s4, -v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v4 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v4, s4, -v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s5, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v4, s5, v6 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[2:3] ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 @@ -1079,48 +1096,54 @@ define amdgpu_kernel void @s_log_v3f32(ptr addrspace(1) %out, <3 x float> %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s3 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x41b17218, s6 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3f317217, v2 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s2, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v8, 0x3377d1cf, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v5, v5, v8 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v0 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3f317217, v0, -v3 +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v6, 0x3377d1cf, v0 :: v_dual_lshlrev_b32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v2 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v1 ; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3f317217, v1, -v4 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3377d1cf, v0 -; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3f317217, v2, -v5 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3377d1cf, v1 -; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x41b17218, s7 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3377d1cf, v2 :: v_dual_cndmask_b32 v1, v1, v4 -; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10 +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v10 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1343,60 +1366,63 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s12, 0x3377d1cf +; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s11, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s11, 0x3f317217 -; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s11, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s12, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, s10, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v3 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v4 -; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2 -; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] -; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v2 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, -v1 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s12, v5 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, s9, v5 +; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v6 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s8, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v5, s11, -v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7 -; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317217, v0 -; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4 +; SI-SDAG-NEXT: v_fma_f32 v6, v5, s12, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v5 ; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 @@ -1470,78 +1496,81 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317000, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s11, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3805fdf4, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v3 -; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 -; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4 -; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v2, s10, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41b17218 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v5, v2, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317000, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3805fdf4, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; VI-SDAG-NEXT: v_ldexp_f32 v5, s9, v5 +; VI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v5, v1 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 -; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317000, v2 -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] -; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 -; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3f317000, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3805fdf4, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x3805fdf4, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v5 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317000, v6 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3805fdf4, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3805fdf4, v5 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3f317000, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317000, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5 @@ -1635,60 +1664,63 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX900-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s11, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x41b17218 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3f317217, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s4, -v3 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s5, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x41b17218 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s5, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 -; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, s10, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v3 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3f317217, v5 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s4, -v2 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s5, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s9, v7 -; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3] -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v2, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v2, s5, v6 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v6, s9, v6 +; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v5, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v7 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s4, -v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v6, s4, -v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s5, v8 -; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3] -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3f317217, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s4, -v5 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v6, s5, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v6 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317217, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s4, -v6 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s5, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[0:1] ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-SDAG-NEXT: s_endpgm @@ -1760,56 +1792,65 @@ define amdgpu_kernel void @s_log_v4f32(ptr addrspace(1) %out, <4 x float> %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s3 -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s3 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x41b17218, s6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s1, v2 :: v_dual_mul_f32 v3, s0, v3 -; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(TRANS32_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x41b17218, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x41b17218, s8 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x41b17218, s9 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_mul_f32 v6, 0x3f317217, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s1, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v3, s0, v3 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v7, 0x3f317217, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s3, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v8, 0x3f317217, v3 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v7, v7, v12 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3f317217, v2 :: v_dual_mul_f32 v8, 0x3f317217, v3 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3f317217, v0 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v6, 0x3f317217, v1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3f317217, v0, -v5 ; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3f317217, v1, -v6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3f317217, v2, -v7 -; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3f317217, v3, -v8 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3377d1cf, v0 :: v_dual_fmac_f32 v11, 0x3377d1cf, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3377d1cf, v2 :: v_dual_fmac_f32 v13, 0x3377d1cf, v3 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1] @@ -2062,10 +2103,10 @@ define float @v_log_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2108,10 +2149,10 @@ define float @v_log_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2160,10 +2201,10 @@ define float @v_log_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2206,21 +2247,22 @@ define float @v_log_f32(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2264,10 +2306,10 @@ define float @v_log_fabs_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2310,10 +2352,10 @@ define float @v_log_fabs_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2362,10 +2404,10 @@ define float @v_log_fabs_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2409,20 +2451,22 @@ define float @v_log_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2467,10 +2511,10 @@ define float @v_log_fneg_fabs_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2513,10 +2557,10 @@ define float @v_log_fneg_fabs_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2565,10 +2609,10 @@ define float @v_log_fneg_fabs_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2612,20 +2656,22 @@ define float @v_log_fneg_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, s0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2671,10 +2717,10 @@ define float @v_log_fneg_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2717,10 +2763,10 @@ define float @v_log_fneg_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2769,10 +2815,10 @@ define float @v_log_fneg_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -2815,21 +2861,22 @@ define float @v_log_fneg_f32(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2875,9 +2922,9 @@ define float @v_log_f32_fast(float %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2902,9 +2949,9 @@ define float @v_log_f32_fast(float %in) { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2929,9 +2976,9 @@ define float @v_log_f32_fast(float %in) { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2955,10 +3002,12 @@ define float @v_log_f32_fast(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 @@ -2993,9 +3042,9 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3020,9 +3069,9 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3047,9 +3096,9 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3073,10 +3122,12 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 @@ -3111,9 +3162,9 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3138,9 +3189,9 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3165,9 +3216,9 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3191,10 +3242,12 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 @@ -3228,10 +3281,10 @@ define float @v_log_f32_ninf(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -3274,10 +3327,10 @@ define float @v_log_f32_ninf(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -3326,10 +3379,10 @@ define float @v_log_f32_ninf(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -3372,21 +3425,22 @@ define float @v_log_f32_ninf(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3431,9 +3485,9 @@ define float @v_log_f32_afn(float %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3458,9 +3512,9 @@ define float @v_log_f32_afn(float %in) { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3485,9 +3539,9 @@ define float @v_log_f32_afn(float %in) { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3511,10 +3565,12 @@ define float @v_log_f32_afn(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 @@ -3578,9 +3634,9 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3605,9 +3661,9 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3632,9 +3688,9 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3658,10 +3714,12 @@ define float @v_log_f32_afn_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 @@ -3696,9 +3754,9 @@ define float @v_fabs_log_f32_afn(float %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3723,9 +3781,9 @@ define float @v_fabs_log_f32_afn(float %in) { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3750,9 +3808,9 @@ define float @v_fabs_log_f32_afn(float %in) { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc1b17218 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3777,10 +3835,11 @@ define float @v_fabs_log_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1 @@ -3956,10 +4015,10 @@ define float @v_log_f32_nnan(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4002,10 +4061,10 @@ define float @v_log_f32_nnan(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -4054,10 +4113,10 @@ define float @v_log_f32_nnan(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4100,21 +4159,22 @@ define float @v_log_f32_nnan(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4298,10 +4358,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4344,10 +4404,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -4396,10 +4456,10 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4442,21 +4502,22 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4640,10 +4701,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4686,10 +4747,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -4738,10 +4799,10 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4784,21 +4845,22 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4842,10 +4904,10 @@ define float @v_log_f32_nnan_ninf(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4882,10 +4944,10 @@ define float @v_log_f32_nnan_ninf(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -4928,10 +4990,10 @@ define float @v_log_f32_nnan_ninf(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -4968,18 +5030,20 @@ define float @v_log_f32_nnan_ninf(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5123,10 +5187,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -5163,10 +5227,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -5209,10 +5273,10 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -5249,18 +5313,20 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5330,10 +5396,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -5376,10 +5442,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -5428,10 +5494,10 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -5474,21 +5540,22 @@ define float @v_log_f32_dynamic_mode(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -6007,17 +6074,17 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3377d1cf ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3377d1cf -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 @@ -6179,10 +6246,10 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) { ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, 0x800000 -; SI-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-NEXT: v_log_f32_e32 v0, v0 ; SI-NEXT: s_mov_b32 s4, 0x3f317217 ; SI-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -6203,10 +6270,10 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) { ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; VI-NEXT: s_mov_b32 s4, 0x800000 -; VI-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -6230,10 +6297,10 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-NEXT: v_log_f32_e32 v0, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x3f317217 ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 @@ -6255,20 +6322,22 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0 ; GFX1100-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll index df880164b196b..fd50d1b60fbd1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll @@ -17,19 +17,19 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-NEXT: s_load_dword s0, s[4:5], 0xb ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s1, 0x3284fbcf ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s0, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x3e9a209a ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, -v1 -; SI-SDAG-NEXT: s_mov_b32 s0, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s0, v2 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s1, v2 ; SI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 @@ -73,11 +73,11 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; VI-SDAG-NEXT: s_load_dword s0, s[4:5], 0x2c ; VI-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s0, v0 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -134,27 +134,27 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dword s0, s[4:5], 0x2c ; GFX900-SDAG-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x800000 ; GFX900-SDAG-NEXT: s_mov_b32 s1, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 -; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x3e9a209a -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s0, -v2 -; GFX900-SDAG-NEXT: v_fma_f32 v3, v0, s1, v3 -; GFX900-SDAG-NEXT: s_mov_b32 s0, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s0, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s1, v3 ; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x411a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 -; GFX900-SDAG-NEXT: global_store_dword v1, v0, s[2:3] +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v2 +; GFX900-SDAG-NEXT: global_store_dword v0, v1, s[2:3] ; GFX900-SDAG-NEXT: s_endpgm ; ; GFX900-GISEL-LABEL: s_log10_f32: @@ -190,21 +190,23 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s2, 0x800000, s0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s2 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, s0, v0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s0, v0 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] @@ -314,38 +316,39 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s8, 0x3284fbcf ; SI-SDAG-NEXT: s_mov_b32 s9, 0x7f800000 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v2, s3, v2 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s3, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s3, 0x3e9a209a ; SI-SDAG-NEXT: s_mov_b32 s4, s0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s3, -v3 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s8, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s9 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s3, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s8, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s9 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s2, v0 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v2 +; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v4, v0, s3, -v3 ; SI-SDAG-NEXT: v_fma_f32 v4, v0, s8, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s9 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -394,43 +397,44 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_mov_b32 s2, 0x7f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s7, v2 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s2 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s7, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s6, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s6, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s6, v0 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v2, v1 -; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v2, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v4, v0, v3 ; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v4 ; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v3 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 ; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[0:1] ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s4 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 @@ -488,36 +492,37 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_mov_b32 s2, 0x3e9a209a ; GFX900-SDAG-NEXT: s_mov_b32 s3, 0x3284fbcf +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s11, v3 -; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 -; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s11, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v3 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s2, -v4 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v3, s3, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v3|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s2, -v3 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s3, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, s10, v0 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v1 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v3 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s2, -v4 ; GFX900-SDAG-NEXT: v_fma_f32 v5, v0, s3, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[0:1] ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 ; GFX900-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] ; GFX900-SDAG-NEXT: s_endpgm @@ -564,31 +569,37 @@ define amdgpu_kernel void @s_log10_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s5 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, 0x3e9a209a, v0 :: v_dual_mul_f32 v3, 0x3e9a209a, v1 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v3, 0x3e9a209a, v1 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s3, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v5, 0x3284fbcf, v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_add_f32 v3, v3, v5 :: v_dual_mul_f32 v2, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 ; GFX1100-SDAG-NEXT: v_fma_f32 v4, 0x3e9a209a, v0, -v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_fma_f32 v5, 0x3e9a209a, v1, -v3 -; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v4, 0x3284fbcf, v0 :: v_dual_fmac_f32 v5, 0x3284fbcf, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_add_f32 v2, v2, v4 :: v_dual_add_f32 v3, v3, v5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v4, 0x3284fbcf, v0 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x411a209b, s5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| ; GFX1100-SDAG-NEXT: v_dual_cndmask_b32 v2, v1, v3 :: v_dual_mov_b32 v3, 0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v0, v4 :: v_dual_sub_f32 v0, v2, v5 ; GFX1100-SDAG-NEXT: global_store_b64 v3, v[0:1], s[0:1] ; GFX1100-SDAG-NEXT: s_endpgm @@ -742,49 +753,51 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s11, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s9, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s9, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s9, 0x3e9a209a -; SI-SDAG-NEXT: s_mov_b32 s12, 0x7f800000 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v1 -; SI-SDAG-NEXT: v_fma_f32 v4, v1, s9, -v3 -; SI-SDAG-NEXT: v_fma_f32 v4, v1, s11, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s9, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s11, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s12 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v2, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v3, s8, v3 -; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 -; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, s8, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x411a209b +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v3, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 -; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s10, v0 -; SI-SDAG-NEXT: v_fma_f32 v6, v3, s9, -v5 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v0 -; SI-SDAG-NEXT: v_fma_f32 v6, v3, s11, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s12 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v3, v5, s[2:3] -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v3 -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 -; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v3 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 +; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s9, -v4 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, v5 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s12 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc -; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s10, v0 +; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 +; SI-SDAG-NEXT: v_log_f32_e32 v5, v0 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v3, s[0:1] +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v5 +; SI-SDAG-NEXT: v_fma_f32 v4, v5, s9, -v2 +; SI-SDAG-NEXT: v_fma_f32 v4, v5, s11, v4 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v5|, s12 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[0:1] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v3 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -847,47 +860,49 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s10, v2 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s10, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v3, s9, v3 -; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v2, s9, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v2 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 -; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s8, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v6, v3, v5 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v3 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v5, v3, v1 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 -; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v5, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[2:3] ; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v4, s[0:1] ; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v3 ; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v0 @@ -978,39 +993,41 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s10, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s10, v2 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s10, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s10, 0x7f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b ; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s4, -v4 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s5, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v4, v4, v5 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s10 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s4, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v4, v1, s5, v4 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v4 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s10 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s9, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s9, v4 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, s9, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s8, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v4 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s4, -v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v4 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v4, s4, -v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v4, s5, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v4, s5, v6 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s10 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v6, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[2:3] ; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] ; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 ; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 @@ -1079,48 +1096,54 @@ define amdgpu_kernel void @s_log10_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s3 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s3 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s6 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s3 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v10, 0, 0x411a209b, s6 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s2, v0 :: v_dual_mul_f32 v1, s1, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v2 :: v_dual_lshlrev_b32 v0, 5, v0 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s2, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s0, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v8, 0x3284fbcf, v2 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v5, v5, v8 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v0 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v6, 0x3e9a209a, v0, -v3 +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v6, 0x3284fbcf, v0 :: v_dual_lshlrev_b32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s1, v1 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v2 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v1 ; GFX1100-SDAG-NEXT: v_fma_f32 v7, 0x3e9a209a, v1, -v4 -; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v6, 0x3284fbcf, v0 -; GFX1100-SDAG-NEXT: v_fma_f32 v8, 0x3e9a209a, v2, -v5 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v7, 0x3284fbcf, v1 -; GFX1100-SDAG-NEXT: v_add_f32_e32 v3, v3, v6 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 0x411a209b, s7 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v8, 0x3284fbcf, v2 :: v_dual_cndmask_b32 v1, v1, v4 -; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v8 :: v_dual_mov_b32 v4, 0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v1, v10 +; GFX1100-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_sub_f32 v1, v1, v10 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v0, v9 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -1343,60 +1366,63 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xd ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s12, 0x3284fbcf +; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 -; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s11, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s11, 0x3e9a209a -; SI-SDAG-NEXT: s_mov_b32 s13, 0x7f800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b -; SI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s11, -v3 -; SI-SDAG-NEXT: v_fma_f32 v4, v2, s12, v4 -; SI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; SI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s11, -v2 +; SI-SDAG-NEXT: v_fma_f32 v3, v1, s12, v3 +; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; SI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v3 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v2, s10, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v3 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; SI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v4 -; SI-SDAG-NEXT: v_fma_f32 v6, v4, s11, -v2 -; SI-SDAG-NEXT: v_fma_f32 v6, v4, s12, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] -; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v2 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s11, -v1 +; SI-SDAG-NEXT: v_fma_f32 v5, v2, s12, v5 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, s9, v5 +; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v6 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; SI-SDAG-NEXT: v_fma_f32 v7, v6, s11, -v4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s8, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v5, s11, -v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_fma_f32 v7, v6, s12, v7 -; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 -; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a209a, v0 -; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v4 +; SI-SDAG-NEXT: v_fma_f32 v6, v5, s12, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s13 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[2:3] +; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; SI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 +; SI-SDAG-NEXT: v_fma_f32 v6, v0, s11, -v5 ; SI-SDAG-NEXT: v_fma_f32 v6, v0, s12, v6 -; SI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; SI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s13 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 @@ -1470,78 +1496,81 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: s_mov_b32 s6, 0x7f800000 ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 -; VI-SDAG-NEXT: v_and_b32_e32 v3, 0xfffff000, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v4, v2, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v3 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v5, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a2000, v3 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v6, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v3, v3, v4 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s11, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 +; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v3, 0x369a84fb, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v4, v3 +; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 +; VI-SDAG-NEXT: v_add_f32_e32 v3, v5, v3 +; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v3 -; VI-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 -; VI-SDAG-NEXT: v_and_b32_e32 v2, 0xfffff000, v4 -; VI-SDAG-NEXT: v_sub_f32_e32 v6, v4, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v2, s10, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v2, v2 +; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x411a209b +; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v1, v3 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v5, v2, v1 +; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a2000, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x369a84fb, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v7, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v6, v5 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; VI-SDAG-NEXT: v_ldexp_f32 v5, s9, v5 +; VI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v4, s[0:1] +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v5, v1 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v2 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 -; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a2000, v2 -; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v2, v2, v6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v6, s9, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v4|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[2:3] -; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v2, v4 -; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v6 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v7, v6, v4 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x3e9a2000, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x369a84fb, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v9, 0x369a84fb, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v7, v9, v7 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_add_f32_e32 v7, v8, v7 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v7 -; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v4, s[2:3] -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v5, vcc -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v4 -; VI-SDAG-NEXT: v_and_b32_e32 v4, 0xfffff000, v0 -; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v4 +; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a2000, v1 +; VI-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s6 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[2:3] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v4, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 +; VI-SDAG-NEXT: v_and_b32_e32 v5, 0xfffff000, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v6, v0, v5 ; VI-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a2000, v6 ; VI-SDAG-NEXT: v_mul_f32_e32 v6, 0x369a84fb, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v4 +; VI-SDAG-NEXT: v_mul_f32_e32 v8, 0x369a84fb, v5 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v8, v6 ; VI-SDAG-NEXT: v_add_f32_e32 v6, v7, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, 0x3e9a2000, v4 -; VI-SDAG-NEXT: v_add_f32_e32 v4, v4, v6 +; VI-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a2000, v5 +; VI-SDAG-NEXT: v_add_f32_e32 v5, v5, v6 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v5, s[0:1] +; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v4 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5 @@ -1635,60 +1664,63 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x34 ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a +; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s11, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, s11, v2 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX900-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s11, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: s_mov_b32 s11, 0x7f800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v6, 0x411a209b -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, 0x3e9a209a, v2 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s4, -v3 -; GFX900-SDAG-NEXT: v_fma_f32 v5, v2, s5, v5 -; GFX900-SDAG-NEXT: v_add_f32_e32 v3, v3, v5 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v2|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] +; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0x411a209b +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v1 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s4, -v2 +; GFX900-SDAG-NEXT: v_fma_f32 v3, v1, s5, v3 +; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v3 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], |v1|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_mul_f32_e32 v3, s10, v3 -; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v3 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v2, v3 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v2, s10, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v1, v3 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s9, v0 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v2, 0x3e9a209a, v5 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s4, -v2 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v5, s5, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v2, v2, v7 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s9, v7 -; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v5|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[2:3] -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v2 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v2, s4, -v1 +; GFX900-SDAG-NEXT: v_fma_f32 v6, v2, s5, v6 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v6 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v6, s9, v6 +; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, v5, s[0:1] ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[0:1], s8, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, v1, s[0:1] -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v2, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v7 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, s8, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s4, -v5 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v1, v2 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, s8, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v6, s4, -v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v8, v7, s5, v8 -; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v8 -; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v7|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v7, v5, s[2:3] -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v6, vcc -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, 0x3e9a209a, v0 -; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s4, -v5 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v6, s5, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v1, v1, v7 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], |v6|, s11 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, v6, v1, s[2:3] +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v5, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v1, v6 +; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v0 +; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s4, -v6 ; GFX900-SDAG-NEXT: v_fma_f32 v7, v0, s5, v7 -; GFX900-SDAG-NEXT: v_add_f32_e32 v5, v5, v7 +; GFX900-SDAG-NEXT: v_add_f32_e32 v6, v6, v7 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s11 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v6, s[0:1] -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[0:1] ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v5 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-SDAG-NEXT: s_endpgm @@ -1760,56 +1792,65 @@ define amdgpu_kernel void @s_log10_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s3 -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s0 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s3 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x4f800000, s6 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s9 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s9 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x411a209b, s6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v0, s3, v0 :: v_dual_mul_f32 v1, s2, v1 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s1, v2 :: v_dual_mul_f32 v3, s0, v3 -; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(TRANS32_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 -; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 0x411a209b, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v14, 0, 0x411a209b, s8 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v15, 0, 0x411a209b, s9 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_mul_f32 v6, 0x3e9a209a, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s1, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v3, s0, v3 +; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v7, 0x3e9a209a, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, s3, v0 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v8, 0x3e9a209a, v3 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 +; GFX1100-SDAG-NEXT: v_add_f32_e32 v7, v7, v12 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v7, 0x3e9a209a, v2 :: v_dual_mul_f32 v8, 0x3e9a209a, v3 +; GFX1100-SDAG-NEXT: v_dual_mul_f32 v5, 0x3e9a209a, v0 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-SDAG-NEXT: v_mul_f32_e32 v6, 0x3e9a209a, v1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_fma_f32 v10, 0x3e9a209a, v0, -v5 ; GFX1100-SDAG-NEXT: v_fma_f32 v11, 0x3e9a209a, v1, -v6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_fma_f32 v12, 0x3e9a209a, v2, -v7 -; GFX1100-SDAG-NEXT: v_fma_f32 v13, 0x3e9a209a, v3, -v8 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v10, 0x3284fbcf, v0 :: v_dual_fmac_f32 v11, 0x3284fbcf, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_dual_fmac_f32 v12, 0x3284fbcf, v2 :: v_dual_fmac_f32 v13, 0x3284fbcf, v3 ; GFX1100-SDAG-NEXT: v_dual_add_f32 v5, v5, v10 :: v_dual_add_f32 v6, v6, v11 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_dual_add_f32 v7, v7, v12 :: v_dual_add_f32 v8, v8, v13 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v1| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v2| ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v5, v2, v7, vcc_lo ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v3| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_sub_f32 v2, v1, v9 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v6, v3, v8, vcc_lo ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v0, v4 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v5, v14 :: v_dual_sub_f32 v0, v6, v15 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: global_store_b128 v7, v[0:3], s[0:1] @@ -2062,10 +2103,10 @@ define float @v_log10_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2108,10 +2149,10 @@ define float @v_log10_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2160,10 +2201,10 @@ define float @v_log10_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2206,21 +2247,22 @@ define float @v_log10_f32(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2264,10 +2306,10 @@ define float @v_log10_fabs_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2310,10 +2352,10 @@ define float @v_log10_fabs_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2362,10 +2404,10 @@ define float @v_log10_fabs_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2409,20 +2451,22 @@ define float @v_log10_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2467,10 +2511,10 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -|v0|, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2513,10 +2557,10 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2565,10 +2609,10 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2612,20 +2656,22 @@ define float @v_log10_fneg_fabs_f32(float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0| +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, s0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2671,10 +2717,10 @@ define float @v_log10_fneg_f32(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2717,10 +2763,10 @@ define float @v_log10_fneg_f32(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -2769,10 +2815,10 @@ define float @v_log10_fneg_f32(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -2815,21 +2861,22 @@ define float @v_log10_fneg_f32(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -2875,9 +2922,9 @@ define float @v_log10_f32_fast(float %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2902,9 +2949,9 @@ define float @v_log10_f32_fast(float %in) { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2929,9 +2976,9 @@ define float @v_log10_f32_fast(float %in) { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2955,10 +3002,12 @@ define float @v_log10_f32_fast(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 @@ -2993,9 +3042,9 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3020,9 +3069,9 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3047,9 +3096,9 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3073,10 +3122,12 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 @@ -3111,9 +3162,9 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3138,9 +3189,9 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3165,9 +3216,9 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3191,10 +3242,12 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 @@ -3228,10 +3281,10 @@ define float @v_log10_f32_ninf(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -3274,10 +3327,10 @@ define float @v_log10_f32_ninf(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -3326,10 +3379,10 @@ define float @v_log10_f32_ninf(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -3372,21 +3425,22 @@ define float @v_log10_f32_ninf(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -3431,9 +3485,9 @@ define float @v_log10_f32_afn(float %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3458,9 +3512,9 @@ define float @v_log10_f32_afn(float %in) { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3485,9 +3539,9 @@ define float @v_log10_f32_afn(float %in) { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3511,10 +3565,12 @@ define float @v_log10_f32_afn(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 @@ -3578,9 +3634,9 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3605,9 +3661,9 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3632,9 +3688,9 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3658,10 +3714,12 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 @@ -3696,9 +3754,9 @@ define float @v_fabs_log10_f32_afn(float %in) { ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; SI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3723,9 +3781,9 @@ define float @v_fabs_log10_f32_afn(float %in) { ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3750,9 +3808,9 @@ define float @v_fabs_log10_f32_afn(float %in) { ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0xc11a209b ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -3777,10 +3835,11 @@ define float @v_fabs_log10_f32_afn(float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1 @@ -3956,10 +4015,10 @@ define float @v_log10_f32_nnan(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4002,10 +4061,10 @@ define float @v_log10_f32_nnan(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -4054,10 +4113,10 @@ define float @v_log10_f32_nnan(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4100,21 +4159,22 @@ define float @v_log10_f32_nnan(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4298,10 +4358,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4344,10 +4404,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -4396,10 +4456,10 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4442,21 +4502,22 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4640,10 +4701,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4686,10 +4747,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -4738,10 +4799,10 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4784,21 +4845,22 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -4842,10 +4904,10 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4882,10 +4944,10 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -4928,10 +4990,10 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -4968,18 +5030,20 @@ define float @v_log10_f32_nnan_ninf(float %in) { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5123,10 +5187,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -5163,10 +5227,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 ; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -5209,10 +5273,10 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -5249,18 +5313,20 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -5330,10 +5396,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; SI-SDAG: ; %bb.0: ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -5376,10 +5442,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; VI-SDAG: ; %bb.0: ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; VI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-SDAG-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -5428,10 +5494,10 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX900-SDAG: ; %bb.0: ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -5474,21 +5540,22 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 { ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 ; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] ; @@ -6007,17 +6074,17 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: s_mov_b32 s5, 0x3284fbcf ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, -v1 -; SI-SDAG-NEXT: s_mov_b32 s4, 0x3284fbcf -; SI-SDAG-NEXT: v_fma_f32 v2, v0, s4, v2 +; SI-SDAG-NEXT: v_fma_f32 v2, v0, s5, v2 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 ; SI-SDAG-NEXT: v_add_f32_e32 v1, v1, v2 ; SI-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 @@ -6179,10 +6246,10 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; SI: ; %bb.0: ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, 0x800000 -; SI-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-NEXT: v_log_f32_e32 v0, v0 ; SI-NEXT: s_mov_b32 s4, 0x3e9a209a ; SI-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -6203,10 +6270,10 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; VI-NEXT: s_mov_b32 s4, 0x800000 -; VI-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: s_mov_b32 s4, 0x7f800000 ; VI-NEXT: v_and_b32_e32 v1, 0xfffff000, v0 @@ -6230,10 +6297,10 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x800000 -; GFX900-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX900-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX900-NEXT: v_log_f32_e32 v0, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x3e9a209a ; GFX900-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 @@ -6255,20 +6322,22 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) +; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX1100-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f32 v0, v0, v1 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0 ; GFX1100-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0| +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_add_f32_e32 v1, v1, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX1100-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0 ; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX1100-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll index c5dea7fd8b4b1..2c5a9f58a199e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll @@ -22,9 +22,9 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s2, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s2, -1 ; SI-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 @@ -59,9 +59,9 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 ; VI-SDAG-NEXT: v_log_f32_e32 v1, v1 ; VI-SDAG-NEXT: v_sub_f32_e32 v2, v1, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 @@ -98,9 +98,9 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 ; GFX900-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 ; GFX900-SDAG-NEXT: global_store_dword v2, v0, s[0:1] @@ -132,11 +132,12 @@ define amdgpu_kernel void @s_log2_f32(ptr addrspace(1) %out, float %in) { ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s0 ; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v1, v0 @@ -215,24 +216,25 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s6, -1 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_log_f32_e32 v3, v1 -; SI-SDAG-NEXT: s_mov_b32 s6, -1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s3, v3 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s2, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v4, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, s0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; @@ -265,21 +267,22 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v2, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v4, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v2, s0 ; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; VI-SDAG-NEXT: s_endpgm @@ -313,21 +316,22 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v5, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v2, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v2, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s2, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_log_f32_e32 v2, v1 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v4, v3 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v2, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s2, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v4, v0 ; GFX900-SDAG-NEXT: global_store_dwordx2 v5, v[0:1], s[0:1] ; GFX900-SDAG-NEXT: s_endpgm ; @@ -362,13 +366,16 @@ define amdgpu_kernel void @s_log2_v2f32(ptr addrspace(1) %out, <2 x float> %in) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s4, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s5, 0x800000, s2 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x4f800000, s4 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s5 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s4 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 0x42000000, s5 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, s3, v1 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v3, s2, v3 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v1, s3, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v1, v1 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v3, v3 @@ -469,28 +476,30 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 +; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s1, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v0, s2, v0 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s0, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v3, v0 -; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s1, v3 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, s2, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, s0, v5 +; SI-SDAG-NEXT: v_log_f32_e32 v7, v0 +; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc ; SI-SDAG-NEXT: s_mov_b32 s6, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v4, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v7 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v5 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v3, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v7, v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v5, v4 ; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 ; SI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm @@ -533,27 +542,29 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s0, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, s1, v6 -; VI-SDAG-NEXT: v_log_f32_e32 v3, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v4, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 +; VI-SDAG-NEXT: v_ldexp_f32 v5, s1, v5 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v6, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v5, v4 ; VI-SDAG-NEXT: v_mov_b32_e32 v3, s4 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s5 ; VI-SDAG-NEXT: flat_store_dwordx3 v[3:4], v[0:2] ; VI-SDAG-NEXT: s_endpgm @@ -595,27 +606,29 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v4, s2, v4 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v6, s1, v6 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s0, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 -; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v7, 0 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v4, v2 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v6, v5 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v3, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 +; GFX900-SDAG-NEXT: v_ldexp_f32 v5, s1, v5 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v5 +; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v3, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v5, v4 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v6, v0 ; GFX900-SDAG-NEXT: global_store_dwordx3 v7, v[0:2], s[6:7] ; GFX900-SDAG-NEXT: s_endpgm ; @@ -654,28 +667,35 @@ define amdgpu_kernel void @s_log2_v3f32(ptr addrspace(1) %out, <3 x float> %in) ; GFX1100-SDAG-NEXT: s_clause 0x1 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v6, 0 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s1 +; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s3, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s3 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 1.0, 0x4f800000, s6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 1.0, 0x4f800000, s7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s3 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s6 -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v2, s2, v2 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v4, s1, v4 :: v_dual_mul_f32 v5, s0, v5 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s3 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v4, 5, v4 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 0x42000000, s7 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v4, s1, v4 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v5, s0, v5 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v4, v4 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v5, v5 -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v6, 0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v2, v2, v0 :: v_dual_sub_f32 v1, v4, v1 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v4, v1 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s2, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v2, v0 ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v5, v3 ; GFX1100-SDAG-NEXT: global_store_b96 v6, v[0:2], s[4:5] ; GFX1100-SDAG-NEXT: s_endpgm @@ -806,34 +826,37 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0xd ; SI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s5, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v3, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc +; SI-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v4, s7, v4 -; SI-SDAG-NEXT: v_mul_f32_e32 v6, s6, v6 -; SI-SDAG-NEXT: v_mul_f32_e32 v8, s5, v8 -; SI-SDAG-NEXT: v_mul_f32_e32 v1, s4, v1 -; SI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; SI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; SI-SDAG-NEXT: v_log_f32_e32 v8, v8 -; SI-SDAG-NEXT: v_log_f32_e32 v9, v1 -; SI-SDAG-NEXT: s_mov_b32 s2, -1 -; SI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v2 -; SI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 -; SI-SDAG-NEXT: v_sub_f32_e32 v1, v8, v7 -; SI-SDAG-NEXT: v_sub_f32_e32 v0, v9, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v7, 5, v7 +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v3, s7, v3 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v5, s6, v5 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v7, s5, v7 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v1, s4, v1 +; SI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; SI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; SI-SDAG-NEXT: v_log_f32_e32 v7, v7 +; SI-SDAG-NEXT: v_log_f32_e32 v8, v1 +; SI-SDAG-NEXT: v_sub_f32_e32 v3, v3, v2 +; SI-SDAG-NEXT: v_sub_f32_e32 v2, v5, v4 +; SI-SDAG-NEXT: v_sub_f32_e32 v1, v7, v6 +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v8, v0 ; SI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; @@ -880,33 +903,36 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 ; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; VI-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 1.0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 1.0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; VI-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v8, 1.0, v3, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc +; VI-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc ; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 -; VI-SDAG-NEXT: v_mul_f32_e32 v4, s3, v4 -; VI-SDAG-NEXT: v_mul_f32_e32 v6, s2, v6 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v5, 5, v5 ; VI-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; VI-SDAG-NEXT: v_log_f32_e32 v4, v4 -; VI-SDAG-NEXT: v_log_f32_e32 v6, v6 -; VI-SDAG-NEXT: v_mul_f32_e32 v8, s1, v8 -; VI-SDAG-NEXT: v_mul_f32_e32 v1, s0, v1 -; VI-SDAG-NEXT: v_log_f32_e32 v8, v8 -; VI-SDAG-NEXT: v_log_f32_e32 v9, v1 -; VI-SDAG-NEXT: v_sub_f32_e32 v3, v4, v2 -; VI-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 +; VI-SDAG-NEXT: v_ldexp_f32 v5, s2, v5 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v7, 5, v7 +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v3, v3 +; VI-SDAG-NEXT: v_log_f32_e32 v5, v5 +; VI-SDAG-NEXT: v_ldexp_f32 v7, s1, v7 +; VI-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 +; VI-SDAG-NEXT: v_log_f32_e32 v7, v7 +; VI-SDAG-NEXT: v_log_f32_e32 v8, v1 +; VI-SDAG-NEXT: v_sub_f32_e32 v3, v3, v2 +; VI-SDAG-NEXT: v_sub_f32_e32 v2, v5, v4 ; VI-SDAG-NEXT: v_mov_b32_e32 v4, s4 -; VI-SDAG-NEXT: v_sub_f32_e32 v1, v8, v7 -; VI-SDAG-NEXT: v_sub_f32_e32 v0, v9, v0 +; VI-SDAG-NEXT: v_sub_f32_e32 v1, v7, v6 +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v8, v0 ; VI-SDAG-NEXT: v_mov_b32_e32 v5, s5 ; VI-SDAG-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-SDAG-NEXT: s_endpgm @@ -954,33 +980,36 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX900-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v0, 0x800000 ; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v3, 0x4f800000 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 ; GFX900-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 1.0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v6, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 1.0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v5, 0, v1, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s1, v0 -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v8, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v9, 1.0, v3, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v7, 0, v1, vcc +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 ; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc -; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v3, vcc -; GFX900-SDAG-NEXT: v_mul_f32_e32 v5, s3, v5 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v7, s2, v7 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v9, s1, v9 -; GFX900-SDAG-NEXT: v_mul_f32_e32 v1, s0, v1 -; GFX900-SDAG-NEXT: v_log_f32_e32 v5, v5 -; GFX900-SDAG-NEXT: v_log_f32_e32 v7, v7 -; GFX900-SDAG-NEXT: v_log_f32_e32 v9, v9 -; GFX900-SDAG-NEXT: v_log_f32_e32 v10, v1 -; GFX900-SDAG-NEXT: v_mov_b32_e32 v4, 0 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v5, v2 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v7, v6 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v9, v8 -; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v10, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v6, 5, v6 +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v8, 5, v8 +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; GFX900-SDAG-NEXT: v_ldexp_f32 v3, s3, v3 +; GFX900-SDAG-NEXT: v_ldexp_f32 v6, s2, v6 +; GFX900-SDAG-NEXT: v_ldexp_f32 v8, s1, v8 +; GFX900-SDAG-NEXT: v_ldexp_f32 v1, s0, v1 +; GFX900-SDAG-NEXT: v_log_f32_e32 v3, v3 +; GFX900-SDAG-NEXT: v_log_f32_e32 v6, v6 +; GFX900-SDAG-NEXT: v_log_f32_e32 v8, v8 +; GFX900-SDAG-NEXT: v_log_f32_e32 v9, v1 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v3, v3, v2 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v2, v6, v5 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v1, v8, v7 +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v9, v0 ; GFX900-SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7] ; GFX900-SDAG-NEXT: s_endpgm ; @@ -1025,34 +1054,42 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) ; GFX1100-SDAG-NEXT: s_clause 0x1 ; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 ; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX1100-SDAG-NEXT: v_mov_b32_e32 v9, 0 ; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s6, 0x800000, s3 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s7, 0x800000, s2 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s8, 0x800000, s1 ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s9, 0x800000, s0 ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s6 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x4f800000, s7 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 1.0, 0x4f800000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 1.0, 0x4f800000, s9 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s6 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v2, s3, v2 :: v_dual_mul_f32 v3, s2, v3 -; GFX1100-SDAG-NEXT: v_dual_mul_f32 v6, s1, v6 :: v_dual_mul_f32 v7, s0, v7 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s7 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s7 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v6, 0, 1, s8 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v7, 0, 1, s9 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v3, 5, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 0x42000000, s6 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s8 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v7, 5, v7 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v2, s3, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v3, s2, v3 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s9 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v7, s0, v7 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v2, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v6, 5, v6 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v8, v3 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(TRANS32_DEP_3) -; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(TRANS32_DEP_3) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v7, v7 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 0x42000000, s8 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, 0x42000000, s9 -; GFX1100-SDAG-NEXT: v_mov_b32_e32 v9, 0 -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v3, v2, v0 :: v_dual_sub_f32 v2, v8, v1 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v3, v2, v0 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v6, s1, v6 +; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v2, v8, v1 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v7, v5 +; GFX1100-SDAG-NEXT: v_log_f32_e32 v6, v6 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff -; GFX1100-SDAG-NEXT: v_dual_sub_f32 v1, v6, v4 :: v_dual_sub_f32 v0, v7, v5 +; GFX1100-SDAG-NEXT: v_sub_f32_e32 v1, v6, v4 ; GFX1100-SDAG-NEXT: global_store_b128 v9, v[0:3], s[4:5] ; GFX1100-SDAG-NEXT: s_endpgm ; @@ -1192,19 +1229,19 @@ define amdgpu_kernel void @s_log2_v4f32(ptr addrspace(1) %out, <4 x float> %in) } define float @v_log2_f32(float %in) { -; GFX689-SDAG-LABEL: v_log2_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32: ; GFX689-GISEL: ; %bb.0: @@ -1220,14 +1257,44 @@ define float @v_log2_f32(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1260,19 +1327,19 @@ define float @v_log2_f32(float %in) { } define float @v_log2_fabs_f32(float %in) { -; GFX689-SDAG-LABEL: v_log2_fabs_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_fabs_f32: ; GFX689-GISEL: ; %bb.0: @@ -1288,15 +1355,44 @@ define float @v_log2_fabs_f32(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1331,19 +1427,19 @@ define float @v_log2_fabs_f32(float %in) { } define float @v_log2_fneg_fabs_f32(float %in) { -; GFX689-SDAG-LABEL: v_log2_fneg_fabs_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_fneg_fabs_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -|v0|, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_fneg_fabs_f32: ; GFX689-GISEL: ; %bb.0: @@ -1359,15 +1455,44 @@ define float @v_log2_fneg_fabs_f32(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_fneg_fabs_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_fneg_fabs_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_fneg_fabs_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e64 s0, 0x80800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -|v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -|v0|, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1403,19 +1528,19 @@ define float @v_log2_fneg_fabs_f32(float %in) { } define float @v_log2_fneg_f32(float %in) { -; GFX689-SDAG-LABEL: v_log2_fneg_f32: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x80800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_fneg_f32: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, -v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_fneg_f32: ; GFX689-GISEL: ; %bb.0: @@ -1431,14 +1556,44 @@ define float @v_log2_fneg_f32(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_fneg_f32: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, -v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_fneg_f32: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x80800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, -v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_fneg_f32: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_lt_f32_e32 vcc_lo, 0x80800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, -v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1473,19 +1628,19 @@ define float @v_log2_fneg_f32(float %in) { } define float @v_log2_f32_fast(float %in) { -; GFX689-SDAG-LABEL: v_log2_f32_fast: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_fast: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_fast: ; GFX689-GISEL: ; %bb.0: @@ -1501,14 +1656,44 @@ define float @v_log2_f32_fast(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_fast: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_fast: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_fast: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1541,19 +1726,19 @@ define float @v_log2_f32_fast(float %in) { } define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { -; GFX689-SDAG-LABEL: v_log2_f32_unsafe_math_attr: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_unsafe_math_attr: ; GFX689-GISEL: ; %bb.0: @@ -1569,14 +1754,44 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_unsafe_math_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_unsafe_math_attr: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1609,19 +1824,19 @@ define float @v_log2_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" { } define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" { -; GFX689-SDAG-LABEL: v_log2_f32_approx_fn_attr: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_approx_fn_attr: ; GFX689-GISEL: ; %bb.0: @@ -1637,14 +1852,44 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_approx_fn_attr: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_approx_fn_attr: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1677,19 +1922,19 @@ define float @v_log2_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" } define float @v_log2_f32_ninf(float %in) { -; GFX689-SDAG-LABEL: v_log2_f32_ninf: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_ninf: ; GFX689-GISEL: ; %bb.0: @@ -1705,14 +1950,44 @@ define float @v_log2_f32_ninf(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1745,19 +2020,19 @@ define float @v_log2_f32_ninf(float %in) { } define float @v_log2_f32_afn(float %in) { -; GFX689-SDAG-LABEL: v_log2_f32_afn: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_afn: ; GFX689-GISEL: ; %bb.0: @@ -1773,14 +2048,44 @@ define float @v_log2_f32_afn(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_afn: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1839,19 +2144,19 @@ define float @v_log2_f32_afn_daz(float %in) #0 { } define float @v_log2_f32_afn_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log2_f32_afn_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_afn_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_afn_dynamic: ; GFX689-GISEL: ; %bb.0: @@ -1867,14 +2172,44 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_afn_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_afn_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_afn_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -1907,19 +2242,19 @@ define float @v_log2_f32_afn_dynamic(float %in) #1 { } define float @v_fabs_log2_f32_afn(float %in) { -; GFX689-SDAG-LABEL: v_fabs_log2_f32_afn: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_fabs_log2_f32_afn: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_fabs_log2_f32_afn: ; GFX689-GISEL: ; %bb.0: @@ -1935,15 +2270,44 @@ define float @v_fabs_log2_f32_afn(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_fabs_log2_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_fabs_log2_f32_afn: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_fabs_log2_f32_afn: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x800000, |v0| ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0 +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, s0 -; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2 -; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, |v0|, v2 ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2004,19 +2368,19 @@ define float @v_log2_f32_daz(float %in) #0 { } define float @v_log2_f32_nnan(float %in) { -; GFX689-SDAG-LABEL: v_log2_f32_nnan: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_nnan: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan: ; GFX689-GISEL: ; %bb.0: @@ -2032,14 +2396,44 @@ define float @v_log2_f32_nnan(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_nnan: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_nnan: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2098,19 +2492,19 @@ define float @v_log2_f32_nnan_daz(float %in) #0 { } define float @v_log2_f32_nnan_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log2_f32_nnan_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_nnan_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan_dynamic: ; GFX689-GISEL: ; %bb.0: @@ -2126,14 +2520,44 @@ define float @v_log2_f32_nnan_dynamic(float %in) #1 { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_nnan_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_nnan_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2192,19 +2616,19 @@ define float @v_log2_f32_ninf_daz(float %in) #0 { } define float @v_log2_f32_ninf_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log2_f32_ninf_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_ninf_dynamic: ; GFX689-GISEL: ; %bb.0: @@ -2220,14 +2644,44 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2260,19 +2714,19 @@ define float @v_log2_f32_ninf_dynamic(float %in) #1 { } define float @v_log2_f32_nnan_ninf(float %in) { -; GFX689-SDAG-LABEL: v_log2_f32_nnan_ninf: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_nnan_ninf: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf: ; GFX689-GISEL: ; %bb.0: @@ -2288,14 +2742,44 @@ define float @v_log2_f32_nnan_ninf(float %in) { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_nnan_ninf: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2354,19 +2838,19 @@ define float @v_log2_f32_nnan_ninf_daz(float %in) #0 { } define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { -; GFX689-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX689-GISEL: ; %bb.0: @@ -2382,14 +2866,44 @@ define float @v_log2_f32_nnan_ninf_dynamic(float %in) #1 { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_nnan_ninf_dynamic: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2448,19 +2962,19 @@ define float @v_log2_f32_fast_daz(float %in) #0 { } define float @v_log2_f32_dynamic_mode(float %in) #1 { -; GFX689-SDAG-LABEL: v_log2_f32_dynamic_mode: -; GFX689-SDAG: ; %bb.0: -; GFX689-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX689-SDAG-NEXT: s_mov_b32 s4, 0x800000 -; GFX689-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX689-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX689-SDAG-NEXT: v_log_f32_e32 v0, v0 -; GFX689-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 -; GFX689-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX689-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 -; GFX689-SDAG-NEXT: s_setpc_b64 s[30:31] +; SI-SDAG-LABEL: v_log2_f32_dynamic_mode: +; SI-SDAG: ; %bb.0: +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v2 +; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; SI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX689-GISEL-LABEL: v_log2_f32_dynamic_mode: ; GFX689-GISEL: ; %bb.0: @@ -2476,14 +2990,44 @@ define float @v_log2_f32_dynamic_mode(float %in) #1 { ; GFX689-GISEL-NEXT: v_sub_f32_e32 v0, v0, v1 ; GFX689-GISEL-NEXT: s_setpc_b64 s[30:31] ; +; VI-SDAG-LABEL: v_log2_f32_dynamic_mode: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; VI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; VI-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; VI-SDAG-NEXT: v_log_f32_e32 v0, v0 +; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; VI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; VI-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; VI-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX900-SDAG-LABEL: v_log2_f32_dynamic_mode: +; GFX900-SDAG: ; %bb.0: +; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-SDAG-NEXT: s_mov_b32 s4, 0x800000 +; GFX900-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 +; GFX900-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX900-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX900-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 +; GFX900-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX900-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 +; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31] +; ; GFX1100-SDAG-LABEL: v_log2_f32_dynamic_mode: ; GFX1100-SDAG: ; %bb.0: ; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo ; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-SDAG-NEXT: v_ldexp_f32 v0, v0, v2 +; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1 @@ -2649,10 +3193,10 @@ define float @v_log2_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) { ; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 ; SI-SDAG-NEXT: s_mov_b32 s4, 0x800000 ; SI-SDAG-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; SI-SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; SI-SDAG-NEXT: v_mul_f32_e32 v0, v0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; SI-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; SI-SDAG-NEXT: v_log_f32_e32 v0, v0 ; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2718,9 +3262,9 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) { ; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; SI-NEXT: s_mov_b32 s4, 0x800000 ; SI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; SI-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; SI-NEXT: v_mul_f32_e32 v0, v0, v2 +; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; SI-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; SI-NEXT: v_log_f32_e32 v0, v0 ; SI-NEXT: v_mov_b32_e32 v1, 0x42000000 ; SI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2732,10 +3276,10 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) { ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; VI-NEXT: s_mov_b32 s4, 0x800000 -; VI-NEXT: v_mov_b32_e32 v1, 0x4f800000 ; VI-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; VI-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; VI-NEXT: v_mul_f32_e32 v0, v0, v1 +; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-NEXT: v_lshlrev_b32_e32 v1, 5, v1 +; VI-NEXT: v_ldexp_f32 v0, v0, v1 ; VI-NEXT: v_log_f32_e32 v0, v0 ; VI-NEXT: v_mov_b32_e32 v1, 0x42000000 ; VI-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2748,9 +3292,9 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) { ; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX900-NEXT: s_mov_b32 s4, 0x800000 ; GFX900-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GFX900-NEXT: v_mov_b32_e32 v2, 0x4f800000 -; GFX900-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GFX900-NEXT: v_mul_f32_e32 v0, v0, v2 +; GFX900-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX900-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX900-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX900-NEXT: v_log_f32_e32 v0, v0 ; GFX900-NEXT: v_mov_b32_e32 v1, 0x42000000 ; GFX900-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -2763,10 +3307,11 @@ define float @v_log2_f32_from_fpext_bf16(bfloat %src) { ; GFX1100-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX1100-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0 -; GFX1100-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, vcc_lo +; GFX1100-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo ; GFX1100-NEXT: v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo -; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v2 -; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1100-NEXT: v_lshlrev_b32_e32 v2, 5, v2 +; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1100-NEXT: v_ldexp_f32 v0, v0, v2 ; GFX1100-NEXT: v_log_f32_e32 v0, v0 ; GFX1100-NEXT: s_waitcnt_depctr 0xfff ; GFX1100-NEXT: v_sub_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll index 6b097bd71c9f1..ba428df273db5 100644 --- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll +++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll @@ -3,20 +3,32 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s define amdgpu_cs float @v_s_exp_f32(float inreg %src) { -; GFX12-LABEL: v_s_exp_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 -; GFX12-NEXT: s_cselect_b32 s1, 0x42800000, 0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-NEXT: s_add_f32 s0, s0, s1 -; GFX12-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0 -; GFX12-NEXT: v_s_exp_f32 s0, s0 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: v_s_exp_f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 +; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42800000, 0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-SDAG-NEXT: s_add_f32 s0, s0, s1 +; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0xffffffc0, 0 +; GFX12-SDAG-NEXT: v_s_exp_f32 s0, s0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX12-SDAG-NEXT: v_ldexp_f32 v0, s0, s1 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: v_s_exp_f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0xc2fc0000 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42800000, 0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-GISEL-NEXT: s_add_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x1f800000, 1.0 +; GFX12-GISEL-NEXT: v_s_exp_f32 s0, s0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %result = call float @llvm.exp2.f32(float %src) ret float %result } @@ -55,20 +67,38 @@ define amdgpu_cs half @v_s_amdgcn_exp_f16(half inreg %src) { } define amdgpu_cs float @v_s_log_f32(float inreg %src) { -; GFX12-LABEL: v_s_log_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000 -; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0 -; GFX12-NEXT: v_s_log_f32 s0, s0 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-NEXT: s_sub_f32 s0, s0, s1 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: v_s_log_f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_cmp_lt_f32 s0, 0x800000 +; GFX12-SDAG-NEXT: s_cselect_b32 s1, -1, 0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 +; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_ldexp_f32 v0, s0, v0 +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: s_and_b32 s0, s1, exec_lo +; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0 +; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: v_s_log_f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 +; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %result = call float @llvm.log2.f32(float %src) ret float %result } @@ -271,22 +301,41 @@ define amdgpu_cs half @v_amdgcn_sqrt_f16(half inreg %src) { } define amdgpu_cs float @srcmods_abs_f32(float inreg %src) { -; GFX12-LABEL: srcmods_abs_f32: -; GFX12: ; %bb.0: -; GFX12-NEXT: s_bitset0_b32 s0, 31 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_cmp_lt_f32 s0, 0x800000 -; GFX12-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 -; GFX12-NEXT: s_mul_f32 s0, s0, s1 -; GFX12-NEXT: s_cselect_b32 s1, 0x42000000, 0 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) -; GFX12-NEXT: v_s_log_f32 s0, s0 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_sub_f32 s0, s0, s1 -; GFX12-NEXT: s_wait_alu 0xfffe -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_2) -; GFX12-NEXT: v_mov_b32_e32 v0, s0 -; GFX12-NEXT: ; return to shader part epilog +; GFX12-SDAG-LABEL: srcmods_abs_f32: +; GFX12-SDAG: ; %bb.0: +; GFX12-SDAG-NEXT: s_and_b32 s1, s0, 0x7fffffff +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_cmp_lt_f32 s1, 0x800000 +; GFX12-SDAG-NEXT: s_cselect_b32 s1, -1, 0 +; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX12-SDAG-NEXT: v_ldexp_f32 v0, |s0|, v0 +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: s_and_b32 s0, s1, exec_lo +; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0 +; GFX12-SDAG-NEXT: s_wait_alu 0xfffe +; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0 +; GFX12-SDAG-NEXT: ; return to shader part epilog +; +; GFX12-GISEL-LABEL: srcmods_abs_f32: +; GFX12-GISEL: ; %bb.0: +; GFX12-GISEL-NEXT: s_bitset0_b32 s0, 31 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX12-GISEL-NEXT: s_cmp_lt_f32 s0, 0x800000 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x4f800000, 1.0 +; GFX12-GISEL-NEXT: s_mul_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_cselect_b32 s1, 0x42000000, 0 +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1) +; GFX12-GISEL-NEXT: v_s_log_f32 s0, s0 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: s_sub_f32 s0, s0, s1 +; GFX12-GISEL-NEXT: s_wait_alu 0xfffe +; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_2) +; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-GISEL-NEXT: ; return to shader part epilog %abs = call float @llvm.fabs.f32(float %src) %result = call float @llvm.log2.f32(float %abs) ret float %result @@ -295,18 +344,20 @@ define amdgpu_cs float @srcmods_abs_f32(float inreg %src) { define amdgpu_cs float @srcmods_neg_f32(float inreg %src) { ; GFX12-SDAG-LABEL: srcmods_neg_f32: ; GFX12-SDAG: ; %bb.0: -; GFX12-SDAG-NEXT: s_xor_b32 s1, s0, 0x80000000 ; GFX12-SDAG-NEXT: s_cmp_gt_f32 s0, 0x80800000 -; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x4f800000, 1.0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-SDAG-NEXT: s_mul_f32 s0, s1, s0 -; GFX12-SDAG-NEXT: s_cselect_b32 s1, 0x42000000, 0 -; GFX12-SDAG-NEXT: v_s_log_f32 s0, s0 +; GFX12-SDAG-NEXT: s_cselect_b32 s1, -1, 0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s1 +; GFX12-SDAG-NEXT: v_lshlrev_b32_e32 v0, 5, v0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) +; GFX12-SDAG-NEXT: v_ldexp_f32 v0, -s0, v0 ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe -; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) -; GFX12-SDAG-NEXT: s_sub_f32 s0, s0, s1 +; GFX12-SDAG-NEXT: s_and_b32 s0, s1, exec_lo +; GFX12-SDAG-NEXT: s_cselect_b32 s0, 0x42000000, 0 +; GFX12-SDAG-NEXT: v_log_f32_e32 v0, v0 ; GFX12-SDAG-NEXT: s_wait_alu 0xfffe -; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-SDAG-NEXT: s_delay_alu instid0(TRANS32_DEP_1) +; GFX12-SDAG-NEXT: v_subrev_f32_e32 v0, s0, v0 ; GFX12-SDAG-NEXT: ; return to shader part epilog ; ; GFX12-GISEL-LABEL: srcmods_neg_f32: diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll index f3c9a5c471aca..f4b947ade8dac 100644 --- a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll @@ -2006,14 +2006,12 @@ define float @v_rsq_f32(float %val) { ; GCN-IEEE-SAFE: ; %bb.0: ; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x4b800000 ; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x45800000 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 @@ -2296,10 +2294,9 @@ define float @v_rsq_f32_contractable_user(float %val0, float %val1) { ; GCN-IEEE-SAFE: ; %bb.0: ; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x4b800000 ; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 ; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x45800000 ; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc @@ -2331,10 +2328,9 @@ define float @v_rsq_f32_contractable_user_missing_contract0(float %val0, float % ; GCN-IEEE-SAFE: ; %bb.0: ; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x4b800000 ; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 ; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x45800000 ; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc @@ -2366,14 +2362,12 @@ define float @v_rsq_f32_contractable_user_missing_contract1(float %val0, float % ; GCN-IEEE-SAFE: ; %bb.0: ; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x4b800000 ; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x45800000 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 ; GCN-IEEE-SAFE-NEXT: v_add_f32_e32 v0, v0, v1 ; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1 @@ -2416,14 +2410,12 @@ define float @v_rsq_f32_known_never_posdenormal(float nofpclass(psub) %val) { ; GCN-IEEE-SAFE: ; %bb.0: ; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x4b800000 ; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 -; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x45800000 -; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc -; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 +; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc +; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 ; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 %div = fdiv contract float 1.0, %sqrt, !fpmath !1