@@ -903,6 +903,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
903
903
ISD::FADD,
904
904
ISD::FSUB,
905
905
ISD::FDIV,
906
+ ISD::FMUL,
906
907
ISD::FMINNUM,
907
908
ISD::FMAXNUM,
908
909
ISD::FMINNUM_IEEE,
@@ -14595,6 +14596,70 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N,
14595
14596
return SDValue();
14596
14597
}
14597
14598
14599
+ SDValue SITargetLowering::performFMulCombine(SDNode *N,
14600
+ DAGCombinerInfo &DCI) const {
14601
+ SelectionDAG &DAG = DCI.DAG;
14602
+ EVT VT = N->getValueType(0);
14603
+ EVT scalarVT = VT.getScalarType();
14604
+ EVT IntVT = VT.changeElementType(MVT::i32);
14605
+
14606
+ SDLoc SL(N);
14607
+ SDValue LHS = N->getOperand(0);
14608
+ SDValue RHS = N->getOperand(1);
14609
+
14610
+ SDNodeFlags Flags = N->getFlags();
14611
+ SDNodeFlags LHSFlags = LHS->getFlags();
14612
+
14613
+ // It is cheaper to realize i32 inline constants as compared against
14614
+ // as materializing f16 or f64 (or even non-inline f32) values,
14615
+ // possible via ldexp usage, as shown below :
14616
+ //
14617
+ // Given : A = 2^a & B = 2^b ; where a and b are integers.
14618
+ // fmul x, (select y, A, B) -> ldexp( x, (select i32 y, a, b) )
14619
+ // fmul x, (select y, -A, -B) -> ldexp( (fneg x), (select i32 y, a, b) )
14620
+ if (scalarVT == MVT::f64 || scalarVT == MVT::f32 || scalarVT == MVT::f16) {
14621
+ if (RHS.hasOneUse() && RHS.getOpcode() == ISD::SELECT) {
14622
+ const ConstantFPSDNode *TrueNode =
14623
+ isConstOrConstSplatFP(RHS.getOperand(1));
14624
+ if (!TrueNode)
14625
+ return SDValue();
14626
+ const ConstantFPSDNode *FalseNode =
14627
+ isConstOrConstSplatFP(RHS.getOperand(2));
14628
+ if (!FalseNode)
14629
+ return SDValue();
14630
+
14631
+ if (TrueNode->isNegative() != FalseNode->isNegative())
14632
+ return SDValue();
14633
+
14634
+ // For f32, only non-inline constants should be transformed.
14635
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
14636
+ if (scalarVT == MVT::f32 &&
14637
+ TII->isInlineConstant(TrueNode->getValueAPF()) &&
14638
+ TII->isInlineConstant(FalseNode->getValueAPF()))
14639
+ return SDValue();
14640
+
14641
+ LHS = TrueNode->isNegative()
14642
+ ? DAG.getNode(ISD::FNEG, SL, VT, LHS, LHSFlags)
14643
+ : LHS;
14644
+
14645
+ int TrueNodeExpVal = TrueNode->getValueAPF().getExactLog2Abs();
14646
+ if (TrueNodeExpVal == INT_MIN)
14647
+ return SDValue();
14648
+ int FalseNodeExpVal = FalseNode->getValueAPF().getExactLog2Abs();
14649
+ if (FalseNodeExpVal == INT_MIN)
14650
+ return SDValue();
14651
+
14652
+ SDValue SelectNode =
14653
+ DAG.getNode(ISD::SELECT, SL, IntVT, RHS.getOperand(0),
14654
+ DAG.getConstant(TrueNodeExpVal, SL, IntVT),
14655
+ DAG.getConstant(FalseNodeExpVal, SL, IntVT));
14656
+ return DAG.getNode(ISD::FLDEXP, SL, VT, LHS, SelectNode, Flags);
14657
+ }
14658
+ }
14659
+
14660
+ return SDValue();
14661
+ }
14662
+
14598
14663
SDValue SITargetLowering::performFMACombine(SDNode *N,
14599
14664
DAGCombinerInfo &DCI) const {
14600
14665
SelectionDAG &DAG = DCI.DAG;
@@ -14881,6 +14946,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
14881
14946
return performFSubCombine(N, DCI);
14882
14947
case ISD::FDIV:
14883
14948
return performFDivCombine(N, DCI);
14949
+ case ISD::FMUL:
14950
+ return performFMulCombine(N, DCI);
14884
14951
case ISD::SETCC:
14885
14952
return performSetCCCombine(N, DCI);
14886
14953
case ISD::FMAXNUM:
0 commit comments