@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
1880
1880
}
1881
1881
case TargetOpcode::G_FPTOUI:
1882
1882
case TargetOpcode::G_FPTOSI:
1883
+ case TargetOpcode::G_FPTOUI_SAT:
1884
+ case TargetOpcode::G_FPTOSI_SAT:
1883
1885
return narrowScalarFPTOI (MI, TypeIdx, NarrowTy);
1884
1886
case TargetOpcode::G_FPEXT:
1885
1887
if (TypeIdx != 0 )
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2872
2874
else
2873
2875
widenScalarSrc (MI, WideTy, 1 , TargetOpcode::G_ZEXT);
2874
2876
2877
+ Observer.changedInstr (MI);
2878
+ return Legalized;
2879
+ case TargetOpcode::G_FPTOSI_SAT:
2880
+ case TargetOpcode::G_FPTOUI_SAT:
2881
+ Observer.changingInstr (MI);
2882
+
2883
+ if (TypeIdx == 0 ) {
2884
+ Register OldDst = MI.getOperand (0 ).getReg ();
2885
+ LLT Ty = MRI.getType (OldDst);
2886
+ Register ExtReg = MRI.createGenericVirtualRegister (WideTy);
2887
+ Register NewDst;
2888
+ MI.getOperand (0 ).setReg (ExtReg);
2889
+ uint64_t ShortBits = Ty.getScalarSizeInBits ();
2890
+ uint64_t WideBits = WideTy.getScalarSizeInBits ();
2891
+ MIRBuilder.setInsertPt (MIRBuilder.getMBB (), ++MIRBuilder.getInsertPt ());
2892
+ if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2893
+ // z = i16 fptosi_sat(a)
2894
+ // ->
2895
+ // x = i32 fptosi_sat(a)
2896
+ // y = smin(x, 32767)
2897
+ // z = smax(y, -32768)
2898
+ auto MaxVal = MIRBuilder.buildConstant (
2899
+ WideTy, APInt::getSignedMaxValue (ShortBits).sext (WideBits));
2900
+ auto MinVal = MIRBuilder.buildConstant (
2901
+ WideTy, APInt::getSignedMinValue (ShortBits).sext (WideBits));
2902
+ Register MidReg =
2903
+ MIRBuilder.buildSMin (WideTy, ExtReg, MaxVal).getReg (0 );
2904
+ NewDst = MIRBuilder.buildSMax (WideTy, MidReg, MinVal).getReg (0 );
2905
+ } else {
2906
+ // z = i16 fptoui_sat(a)
2907
+ // ->
2908
+ // x = i32 fptoui_sat(a)
2909
+ // y = smin(x, 65535)
2910
+ auto MaxVal = MIRBuilder.buildConstant (
2911
+ WideTy, APInt::getAllOnes (ShortBits).zext (WideBits));
2912
+ NewDst = MIRBuilder.buildUMin (WideTy, ExtReg, MaxVal).getReg (0 );
2913
+ }
2914
+ MIRBuilder.buildTrunc (OldDst, NewDst);
2915
+ } else
2916
+ widenScalarSrc (MI, WideTy, 1 , TargetOpcode::G_FPEXT);
2917
+
2875
2918
Observer.changedInstr (MI);
2876
2919
return Legalized;
2877
2920
case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4170
4213
return lowerFPTOUI (MI);
4171
4214
case G_FPTOSI:
4172
4215
return lowerFPTOSI (MI);
4216
+ case G_FPTOUI_SAT:
4217
+ case G_FPTOSI_SAT:
4218
+ return lowerFPTOINT_SAT (MI);
4173
4219
case G_FPTRUNC:
4174
4220
return lowerFPTRUNC (MI);
4175
4221
case G_FPOWI:
@@ -4982,6 +5028,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
4982
5028
case G_UITOFP:
4983
5029
case G_FPTOSI:
4984
5030
case G_FPTOUI:
5031
+ case G_FPTOSI_SAT:
5032
+ case G_FPTOUI_SAT:
4985
5033
case G_INTTOPTR:
4986
5034
case G_PTRTOINT:
4987
5035
case G_ADDRSPACE_CAST:
@@ -5773,6 +5821,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
5773
5821
case TargetOpcode::G_FPEXT:
5774
5822
case TargetOpcode::G_FPTOSI:
5775
5823
case TargetOpcode::G_FPTOUI:
5824
+ case TargetOpcode::G_FPTOSI_SAT:
5825
+ case TargetOpcode::G_FPTOUI_SAT:
5776
5826
case TargetOpcode::G_SITOFP:
5777
5827
case TargetOpcode::G_UITOFP: {
5778
5828
Observer.changingInstr (MI);
@@ -7281,6 +7331,103 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
7281
7331
return Legalized;
7282
7332
}
7283
7333
7334
+ LegalizerHelper::LegalizeResult
7335
+ LegalizerHelper::lowerFPTOINT_SAT (MachineInstr &MI) {
7336
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs ();
7337
+
7338
+ bool IsSigned = MI.getOpcode () == TargetOpcode::G_FPTOSI_SAT;
7339
+ unsigned SatWidth = DstTy.getScalarSizeInBits ();
7340
+
7341
+ // Determine minimum and maximum integer values and their corresponding
7342
+ // floating-point values.
7343
+ APInt MinInt, MaxInt;
7344
+ if (IsSigned) {
7345
+ MinInt = APInt::getSignedMinValue (SatWidth);
7346
+ MaxInt = APInt::getSignedMaxValue (SatWidth);
7347
+ } else {
7348
+ MinInt = APInt::getMinValue (SatWidth);
7349
+ MaxInt = APInt::getMaxValue (SatWidth);
7350
+ }
7351
+
7352
+ const fltSemantics &Semantics = getFltSemanticForLLT (SrcTy.getScalarType ());
7353
+ APFloat MinFloat (Semantics);
7354
+ APFloat MaxFloat (Semantics);
7355
+
7356
+ APFloat::opStatus MinStatus =
7357
+ MinFloat.convertFromAPInt (MinInt, IsSigned, APFloat::rmTowardZero);
7358
+ APFloat::opStatus MaxStatus =
7359
+ MaxFloat.convertFromAPInt (MaxInt, IsSigned, APFloat::rmTowardZero);
7360
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7361
+ !(MaxStatus & APFloat::opStatus::opInexact);
7362
+
7363
+ // If the integer bounds are exactly representable as floats and min/max are
7364
+ // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
7365
+ // of comparisons and selects.
7366
+ bool MinMaxLegal = LI.isLegal ({TargetOpcode::G_FMINNUM, SrcTy}) &&
7367
+ LI.isLegal ({TargetOpcode::G_FMAXNUM, SrcTy});
7368
+ if (AreExactFloatBounds && MinMaxLegal) {
7369
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7370
+ auto Max = MIRBuilder.buildFMaxNum (
7371
+ SrcTy, Src, MIRBuilder.buildFConstant (SrcTy, MinFloat));
7372
+ // Clamp by MaxFloat from above. NaN cannot occur.
7373
+ auto Min = MIRBuilder.buildFMinNum (
7374
+ SrcTy, Max, MIRBuilder.buildFConstant (SrcTy, MaxFloat),
7375
+ MachineInstr::FmNoNans);
7376
+ // Convert clamped value to integer. In the unsigned case we're done,
7377
+ // because we mapped NaN to MinFloat, which will cast to zero.
7378
+ if (!IsSigned) {
7379
+ MIRBuilder.buildFPTOUI (Dst, Min);
7380
+ MI.eraseFromParent ();
7381
+ return Legalized;
7382
+ }
7383
+
7384
+ // Otherwise, select 0 if Src is NaN.
7385
+ auto FpToInt = MIRBuilder.buildFPTOSI (DstTy, Min);
7386
+ auto IsZero = MIRBuilder.buildFCmp (CmpInst::FCMP_UNO,
7387
+ DstTy.changeElementSize (1 ), Src, Src);
7388
+ MIRBuilder.buildSelect (Dst, IsZero, MIRBuilder.buildConstant (DstTy, 0 ),
7389
+ FpToInt);
7390
+ MI.eraseFromParent ();
7391
+ return Legalized;
7392
+ }
7393
+
7394
+ // Result of direct conversion. The assumption here is that the operation is
7395
+ // non-trapping and it's fine to apply it to an out-of-range value if we
7396
+ // select it away later.
7397
+ auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI (DstTy, Src)
7398
+ : MIRBuilder.buildFPTOUI (DstTy, Src);
7399
+
7400
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
7401
+ // MinInt if Src is NaN.
7402
+ auto ULT =
7403
+ MIRBuilder.buildFCmp (CmpInst::FCMP_ULT, SrcTy.changeElementSize (1 ), Src,
7404
+ MIRBuilder.buildFConstant (SrcTy, MinFloat));
7405
+ auto Max = MIRBuilder.buildSelect (
7406
+ DstTy, ULT, MIRBuilder.buildConstant (DstTy, MinInt), FpToInt);
7407
+ // If Src OGT MaxFloat, select MaxInt.
7408
+ auto OGT =
7409
+ MIRBuilder.buildFCmp (CmpInst::FCMP_OGT, SrcTy.changeElementSize (1 ), Src,
7410
+ MIRBuilder.buildFConstant (SrcTy, MaxFloat));
7411
+
7412
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
7413
+ // is already zero.
7414
+ if (!IsSigned) {
7415
+ MIRBuilder.buildSelect (Dst, OGT, MIRBuilder.buildConstant (DstTy, MaxInt),
7416
+ Max, MachineInstr::FmNoNans);
7417
+ MI.eraseFromParent ();
7418
+ return Legalized;
7419
+ }
7420
+
7421
+ // Otherwise, select 0 if Src is NaN.
7422
+ auto Min = MIRBuilder.buildSelect (
7423
+ DstTy, OGT, MIRBuilder.buildConstant (DstTy, MaxInt), Max);
7424
+ auto IsZero = MIRBuilder.buildFCmp (CmpInst::FCMP_UNO,
7425
+ DstTy.changeElementSize (1 ), Src, Src);
7426
+ MIRBuilder.buildSelect (Dst, IsZero, MIRBuilder.buildConstant (DstTy, 0 ), Min);
7427
+ MI.eraseFromParent ();
7428
+ return Legalized;
7429
+ }
7430
+
7284
7431
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7285
7432
LegalizerHelper::LegalizeResult
7286
7433
LegalizerHelper::lowerFPTRUNC_F64_TO_F16 (MachineInstr &MI) {
0 commit comments