@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
1880
1880
}
1881
1881
case TargetOpcode::G_FPTOUI:
1882
1882
case TargetOpcode::G_FPTOSI:
1883
+ case TargetOpcode::G_FPTOUI_SAT:
1884
+ case TargetOpcode::G_FPTOSI_SAT:
1883
1885
return narrowScalarFPTOI (MI, TypeIdx, NarrowTy);
1884
1886
case TargetOpcode::G_FPEXT:
1885
1887
if (TypeIdx != 0 )
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2872
2874
else
2873
2875
widenScalarSrc (MI, WideTy, 1 , TargetOpcode::G_ZEXT);
2874
2876
2877
+ Observer.changedInstr (MI);
2878
+ return Legalized;
2879
+ case TargetOpcode::G_FPTOSI_SAT:
2880
+ case TargetOpcode::G_FPTOUI_SAT:
2881
+ Observer.changingInstr (MI);
2882
+
2883
+ if (TypeIdx == 0 ) {
2884
+ Register OldDst = MI.getOperand (0 ).getReg ();
2885
+ LLT Ty = MRI.getType (OldDst);
2886
+ Register ExtReg = MRI.createGenericVirtualRegister (WideTy);
2887
+ Register NewDst;
2888
+ MI.getOperand (0 ).setReg (ExtReg);
2889
+ uint64_t ShortBits = Ty.getScalarSizeInBits ();
2890
+ uint64_t WideBits = WideTy.getScalarSizeInBits ();
2891
+ MIRBuilder.setInsertPt (MIRBuilder.getMBB (), ++MIRBuilder.getInsertPt ());
2892
+ if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2893
+ // z = i16 fptosi_sat(a)
2894
+ // ->
2895
+ // x = i32 fptosi_sat(a)
2896
+ // y = smin(x, 32767)
2897
+ // z = smax(y, -32768)
2898
+ auto MaxVal = MIRBuilder.buildConstant (
2899
+ WideTy, APInt::getSignedMaxValue (ShortBits).sext (WideBits));
2900
+ auto MinVal = MIRBuilder.buildConstant (
2901
+ WideTy, APInt::getSignedMinValue (ShortBits).sext (WideBits));
2902
+ Register MidReg =
2903
+ MIRBuilder.buildSMin (WideTy, ExtReg, MaxVal).getReg (0 );
2904
+ NewDst = MIRBuilder.buildSMax (WideTy, MidReg, MinVal).getReg (0 );
2905
+ } else {
2906
+ // z = i16 fptoui_sat(a)
2907
+ // ->
2908
+ // x = i32 fptoui_sat(a)
2909
+ // y = smin(x, 65535)
2910
+ auto MaxVal = MIRBuilder.buildConstant (
2911
+ WideTy, APInt::getAllOnes (ShortBits).zext (WideBits));
2912
+ NewDst = MIRBuilder.buildUMin (WideTy, ExtReg, MaxVal).getReg (0 );
2913
+ }
2914
+ MIRBuilder.buildTrunc (OldDst, NewDst);
2915
+ } else
2916
+ widenScalarSrc (MI, WideTy, 1 , TargetOpcode::G_FPEXT);
2917
+
2875
2918
Observer.changedInstr (MI);
2876
2919
return Legalized;
2877
2920
case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4170
4213
return lowerFPTOUI (MI);
4171
4214
case G_FPTOSI:
4172
4215
return lowerFPTOSI (MI);
4216
+ case G_FPTOUI_SAT:
4217
+ case G_FPTOSI_SAT:
4218
+ return lowerFPTOINT_SAT (MI);
4173
4219
case G_FPTRUNC:
4174
4220
return lowerFPTRUNC (MI);
4175
4221
case G_FPOWI:
@@ -4986,6 +5032,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
4986
5032
case G_UITOFP:
4987
5033
case G_FPTOSI:
4988
5034
case G_FPTOUI:
5035
+ case G_FPTOSI_SAT:
5036
+ case G_FPTOUI_SAT:
4989
5037
case G_INTTOPTR:
4990
5038
case G_PTRTOINT:
4991
5039
case G_ADDRSPACE_CAST:
@@ -5777,6 +5825,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
5777
5825
case TargetOpcode::G_FPEXT:
5778
5826
case TargetOpcode::G_FPTOSI:
5779
5827
case TargetOpcode::G_FPTOUI:
5828
+ case TargetOpcode::G_FPTOSI_SAT:
5829
+ case TargetOpcode::G_FPTOUI_SAT:
5780
5830
case TargetOpcode::G_SITOFP:
5781
5831
case TargetOpcode::G_UITOFP: {
5782
5832
Observer.changingInstr (MI);
@@ -7285,6 +7335,106 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
7285
7335
return Legalized;
7286
7336
}
7287
7337
7338
+ LegalizerHelper::LegalizeResult
7339
+ LegalizerHelper::lowerFPTOINT_SAT (MachineInstr &MI) {
7340
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs ();
7341
+
7342
+ bool IsSigned = MI.getOpcode () == TargetOpcode::G_FPTOSI_SAT;
7343
+ unsigned SatWidth = DstTy.getScalarSizeInBits ();
7344
+
7345
+ // Determine minimum and maximum integer values and their corresponding
7346
+ // floating-point values.
7347
+ APInt MinInt, MaxInt;
7348
+ if (IsSigned) {
7349
+ MinInt = APInt::getSignedMinValue (SatWidth);
7350
+ MaxInt = APInt::getSignedMaxValue (SatWidth);
7351
+ } else {
7352
+ MinInt = APInt::getMinValue (SatWidth);
7353
+ MaxInt = APInt::getMaxValue (SatWidth);
7354
+ }
7355
+
7356
+ const fltSemantics &Semantics = getFltSemanticForLLT (SrcTy.getScalarType ());
7357
+ APFloat MinFloat (Semantics);
7358
+ APFloat MaxFloat (Semantics);
7359
+
7360
+ APFloat::opStatus MinStatus =
7361
+ MinFloat.convertFromAPInt (MinInt, IsSigned, APFloat::rmTowardZero);
7362
+ APFloat::opStatus MaxStatus =
7363
+ MaxFloat.convertFromAPInt (MaxInt, IsSigned, APFloat::rmTowardZero);
7364
+ bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7365
+ !(MaxStatus & APFloat::opStatus::opInexact);
7366
+
7367
+ // If the integer bounds are exactly representable as floats, emit a
7368
+ // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
7369
+ // and selects.
7370
+ if (AreExactFloatBounds) {
7371
+ // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7372
+ auto MaxC = MIRBuilder.buildFConstant (SrcTy, MinFloat);
7373
+ auto MaxP = MIRBuilder.buildFCmp (CmpInst::FCMP_ULT,
7374
+ SrcTy.changeElementSize (1 ), Src, MaxC);
7375
+ auto Max = MIRBuilder.buildSelect (SrcTy, MaxP, Src, MaxC);
7376
+ // Clamp by MaxFloat from above. NaN cannot occur.
7377
+ auto MinC = MIRBuilder.buildFConstant (SrcTy, MaxFloat);
7378
+ auto MinP =
7379
+ MIRBuilder.buildFCmp (CmpInst::FCMP_OGT, SrcTy.changeElementSize (1 ), Max,
7380
+ MinC, MachineInstr::FmNoNans);
7381
+ auto Min =
7382
+ MIRBuilder.buildSelect (SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
7383
+ // Convert clamped value to integer. In the unsigned case we're done,
7384
+ // because we mapped NaN to MinFloat, which will cast to zero.
7385
+ if (!IsSigned) {
7386
+ MIRBuilder.buildFPTOUI (Dst, Min);
7387
+ MI.eraseFromParent ();
7388
+ return Legalized;
7389
+ }
7390
+
7391
+ // Otherwise, select 0 if Src is NaN.
7392
+ auto FpToInt = MIRBuilder.buildFPTOSI (DstTy, Min);
7393
+ auto IsZero = MIRBuilder.buildFCmp (CmpInst::FCMP_UNO,
7394
+ DstTy.changeElementSize (1 ), Src, Src);
7395
+ MIRBuilder.buildSelect (Dst, IsZero, MIRBuilder.buildConstant (DstTy, 0 ),
7396
+ FpToInt);
7397
+ MI.eraseFromParent ();
7398
+ return Legalized;
7399
+ }
7400
+
7401
+ // Result of direct conversion. The assumption here is that the operation is
7402
+ // non-trapping and it's fine to apply it to an out-of-range value if we
7403
+ // select it away later.
7404
+ auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI (DstTy, Src)
7405
+ : MIRBuilder.buildFPTOUI (DstTy, Src);
7406
+
7407
+ // If Src ULT MinFloat, select MinInt. In particular, this also selects
7408
+ // MinInt if Src is NaN.
7409
+ auto ULT =
7410
+ MIRBuilder.buildFCmp (CmpInst::FCMP_ULT, SrcTy.changeElementSize (1 ), Src,
7411
+ MIRBuilder.buildFConstant (SrcTy, MinFloat));
7412
+ auto Max = MIRBuilder.buildSelect (
7413
+ DstTy, ULT, MIRBuilder.buildConstant (DstTy, MinInt), FpToInt);
7414
+ // If Src OGT MaxFloat, select MaxInt.
7415
+ auto OGT =
7416
+ MIRBuilder.buildFCmp (CmpInst::FCMP_OGT, SrcTy.changeElementSize (1 ), Src,
7417
+ MIRBuilder.buildFConstant (SrcTy, MaxFloat));
7418
+
7419
+ // In the unsigned case we are done, because we mapped NaN to MinInt, which
7420
+ // is already zero.
7421
+ if (!IsSigned) {
7422
+ MIRBuilder.buildSelect (Dst, OGT, MIRBuilder.buildConstant (DstTy, MaxInt),
7423
+ Max);
7424
+ MI.eraseFromParent ();
7425
+ return Legalized;
7426
+ }
7427
+
7428
+ // Otherwise, select 0 if Src is NaN.
7429
+ auto Min = MIRBuilder.buildSelect (
7430
+ DstTy, OGT, MIRBuilder.buildConstant (DstTy, MaxInt), Max);
7431
+ auto IsZero = MIRBuilder.buildFCmp (CmpInst::FCMP_UNO,
7432
+ DstTy.changeElementSize (1 ), Src, Src);
7433
+ MIRBuilder.buildSelect (Dst, IsZero, MIRBuilder.buildConstant (DstTy, 0 ), Min);
7434
+ MI.eraseFromParent ();
7435
+ return Legalized;
7436
+ }
7437
+
7288
7438
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
7289
7439
LegalizerHelper::LegalizeResult
7290
7440
LegalizerHelper::lowerFPTRUNC_F64_TO_F16 (MachineInstr &MI) {
0 commit comments