Skip to content

Commit b1fdd89

Browse files
committed
[CodeGen] [AMDGPU] Attempt DAGCombine for fmul with select to ldexp
For the f32/f16, this combine does no improvements, but for f64 this specific case of fmul with select is more costly to materialize as compared to ldexp, so the following dag combine does the magic. fmul x, select(y, 2.0, 1.0) -> ldexp x, zext(i1 y) fmul x, selcet(y, 0.5, 1.0) -> ldexp x, sext(i1 y)
1 parent 5e3f615 commit b1fdd89

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
903903
ISD::FADD,
904904
ISD::FSUB,
905905
ISD::FDIV,
906+
ISD::FMUL,
906907
ISD::FMINNUM,
907908
ISD::FMAXNUM,
908909
ISD::FMINNUM_IEEE,
@@ -14595,6 +14596,70 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N,
1459514596
return SDValue();
1459614597
}
1459714598

14599+
SDValue SITargetLowering::performFMulCombine(SDNode *N,
14600+
DAGCombinerInfo &DCI) const {
14601+
SelectionDAG &DAG = DCI.DAG;
14602+
EVT VT = N->getValueType(0);
14603+
EVT scalarVT = VT.getScalarType();
14604+
EVT IntVT = VT.changeElementType(MVT::i32);
14605+
14606+
SDLoc SL(N);
14607+
SDValue LHS = N->getOperand(0);
14608+
SDValue RHS = N->getOperand(1);
14609+
14610+
SDNodeFlags Flags = N->getFlags();
14611+
SDNodeFlags LHSFlags = LHS->getFlags();
14612+
14613+
// It is cheaper to realize i32 inline constants as compared against
14614+
// as materializing f16 or f64 (or even non-inline f32) values,
14615+
// possible via ldexp usage, as shown below :
14616+
//
14617+
// Given : A = 2^a & B = 2^b ; where a and b are integers.
14618+
// fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) )
14619+
// fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) )
14620+
if (scalarVT == MVT::f64 || scalarVT == MVT::f32 || scalarVT == MVT::f16) {
14621+
if (RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT) {
14622+
const ConstantFPSDNode *TrueNode =
14623+
isConstOrConstSplatFP(RHS.getOperand(1));
14624+
if (!TrueNode)
14625+
return SDValue();
14626+
const ConstantFPSDNode *FalseNode =
14627+
isConstOrConstSplatFP(RHS.getOperand(2));
14628+
if (!FalseNode)
14629+
return SDValue();
14630+
14631+
if (TrueNode->isNegative() != FalseNode->isNegative())
14632+
return SDValue();
14633+
14634+
// For f32, only non-inline constants should be transformed.
14635+
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
14636+
if (scalarVT == MVT::f32 &&
14637+
TII->isInlineConstant(TrueNode->getValueAPF()) &&
14638+
TII->isInlineConstant(FalseNode->getValueAPF()))
14639+
return SDValue();
14640+
14641+
LHS = TrueNode->isNegative()
14642+
? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHSFlags)
14643+
: LHS;
14644+
14645+
int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs();
14646+
if (TrueNodeExpVal == INT_MIN)
14647+
return SDValue();
14648+
int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs();
14649+
if (FalseNodeExpVal == INT_MIN)
14650+
return SDValue();
14651+
14652+
SDValue SelectNode =
14653+
DAG.getNode(ISD::SELECT, SL, IntVT, RHS.getOperand(0),
14654+
DAG.getConstant(TrueNodeExpVal, SL, IntVT),
14655+
DAG.getConstant(FalseNodeExpVal, SL, IntVT));
14656+
return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, Flags);
14657+
}
14658+
}
14659+
14660+
return SDValue();
14661+
}
14662+
1459814663
SDValue SITargetLowering::performFMACombine(SDNode *N,
1459914664
DAGCombinerInfo &DCI) const {
1460014665
SelectionDAG &DAG = DCI.DAG;
@@ -14881,6 +14946,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1488114946
return performFSubCombine(N, DCI);
1488214947
case ISD::FDIV:
1488314948
return performFDivCombine(N, DCI);
14949+
case ISD::FMUL:
14950+
return performFMulCombine(N, DCI);
1488414951
case ISD::SETCC:
1488514952
return performSetCCCombine(N, DCI);
1488614953
case ISD::FMAXNUM:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
218218
SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
219219
SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
220220
SDValue performFDivCombine(SDNode *N, DAGCombinerInfo &DCI) const;
221+
SDValue performFMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
221222
SDValue performFMACombine(SDNode *N, DAGCombinerInfo &DCI) const;
222223
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
223224
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;

0 commit comments

Comments
 (0)