Skip to content

Commit 1785d27

Browse files
committed
[GlobalISel][AArch6] Add G_FPTOSI_SAT/G_FPTOUI_SAT
This is an implementation of the saturating fp to int conversions for GlobalISel. On AArch64 the converstion instrctions work this way, producing saturating results. LegalizerHelper::lowerFPTOINT_SAT is ported from SDAG. AArch64 has a lot of existing tests for fptosi_sat, covering a wide range of types. I have tried to make most of them work all at once, but a few fall back due to other missing features such as f128 handling for min/max.
1 parent 10fe531 commit 1785d27

File tree

16 files changed

+1387
-560
lines changed

16 files changed

+1387
-560
lines changed

llvm/docs/GlobalISel/GenericOpcode.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,11 @@ G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
504504

505505
Convert between integer and floating point.
506506

507+
G_FPTOSI_SAT, G_FPTOUI_SAT
508+
^^^^^^^^^^^^^^^^^^^^^^^^^^
509+
510+
Saturating convert between integer and floating point.
511+
507512
G_FABS
508513
^^^^^^
509514

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -823,6 +823,8 @@ class GCastOp : public GenericMachineInstr {
823823
case TargetOpcode::G_FPEXT:
824824
case TargetOpcode::G_FPTOSI:
825825
case TargetOpcode::G_FPTOUI:
826+
case TargetOpcode::G_FPTOSI_SAT:
827+
case TargetOpcode::G_FPTOUI_SAT:
826828
case TargetOpcode::G_FPTRUNC:
827829
case TargetOpcode::G_INTTOPTR:
828830
case TargetOpcode::G_PTRTOINT:

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,7 @@ class LegalizerHelper {
398398
LegalizeResult lowerSITOFP(MachineInstr &MI);
399399
LegalizeResult lowerFPTOUI(MachineInstr &MI);
400400
LegalizeResult lowerFPTOSI(MachineInstr &MI);
401+
LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI);
401402

402403
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
403404
LegalizeResult lowerFPTRUNC(MachineInstr &MI);

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,16 @@ class MachineIRBuilder {
20002000
return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
20012001
}
20022002

2003+
/// Build and insert \p Res = G_FPTOUI_SAT \p Src0
2004+
MachineInstrBuilder buildFPTOUI_SAT(const DstOp &Dst, const SrcOp &Src0) {
2005+
return buildInstr(TargetOpcode::G_FPTOUI_SAT, {Dst}, {Src0});
2006+
}
2007+
2008+
/// Build and insert \p Res = G_FPTOSI_SAT \p Src0
2009+
MachineInstrBuilder buildFPTOSI_SAT(const DstOp &Dst, const SrcOp &Src0) {
2010+
return buildInstr(TargetOpcode::G_FPTOSI_SAT, {Dst}, {Src0});
2011+
}
2012+
20032013
/// Build and insert \p Dst = G_INTRINSIC_ROUNDEVEN \p Src0, \p Src1
20042014
MachineInstrBuilder
20052015
buildIntrinsicRoundeven(const DstOp &Dst, const SrcOp &Src0,

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,12 @@ HANDLE_TARGET_OPCODE(G_SITOFP)
674674
/// Generic unsigned-int to float conversion
675675
HANDLE_TARGET_OPCODE(G_UITOFP)
676676

677+
/// Generic saturating float to signed-int conversion
678+
HANDLE_TARGET_OPCODE(G_FPTOSI_SAT)
679+
680+
/// Generic saturating float to unsigned-int conversion
681+
HANDLE_TARGET_OPCODE(G_FPTOUI_SAT)
682+
677683
/// Generic FP absolute value.
678684
HANDLE_TARGET_OPCODE(G_FABS)
679685

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,18 @@ def G_UITOFP : GenericInstruction {
769769
let hasSideEffects = false;
770770
}
771771

772+
def G_FPTOSI_SAT : GenericInstruction {
773+
let OutOperandList = (outs type0:$dst);
774+
let InOperandList = (ins type1:$src);
775+
let hasSideEffects = false;
776+
}
777+
778+
def G_FPTOUI_SAT : GenericInstruction {
779+
let OutOperandList = (outs type0:$dst);
780+
let InOperandList = (ins type1:$src);
781+
let hasSideEffects = false;
782+
}
783+
772784
def G_FABS : GenericInstruction {
773785
let OutOperandList = (outs type0:$dst);
774786
let InOperandList = (ins type0:$src);

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ def : GINodeEquiv<G_FPTOSI, fp_to_sint>;
9898
def : GINodeEquiv<G_FPTOUI, fp_to_uint>;
9999
def : GINodeEquiv<G_SITOFP, sint_to_fp>;
100100
def : GINodeEquiv<G_UITOFP, uint_to_fp>;
101+
def : GINodeEquiv<G_FPTOSI_SAT, fp_to_sint_sat>;
102+
def : GINodeEquiv<G_FPTOUI_SAT, fp_to_uint_sat>;
101103
def : GINodeEquiv<G_FADD, fadd>;
102104
def : GINodeEquiv<G_FSUB, fsub>;
103105
def : GINodeEquiv<G_FMA, fma>;

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2332,6 +2332,14 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
23322332
MachineInstr::copyFlagsFromInstruction(CI));
23332333
return true;
23342334
}
2335+
case Intrinsic::fptosi_sat:
2336+
MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
2337+
getOrCreateVReg(*CI.getArgOperand(0)));
2338+
return true;
2339+
case Intrinsic::fptoui_sat:
2340+
MIRBuilder.buildFPTOUI_SAT(getOrCreateVReg(CI),
2341+
getOrCreateVReg(*CI.getArgOperand(0)));
2342+
return true;
23352343
case Intrinsic::memcpy_inline:
23362344
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
23372345
case Intrinsic::memcpy:

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,6 +1880,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
18801880
}
18811881
case TargetOpcode::G_FPTOUI:
18821882
case TargetOpcode::G_FPTOSI:
1883+
case TargetOpcode::G_FPTOUI_SAT:
1884+
case TargetOpcode::G_FPTOSI_SAT:
18831885
return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
18841886
case TargetOpcode::G_FPEXT:
18851887
if (TypeIdx != 0)
@@ -2872,6 +2874,47 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28722874
else
28732875
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
28742876

2877+
Observer.changedInstr(MI);
2878+
return Legalized;
2879+
case TargetOpcode::G_FPTOSI_SAT:
2880+
case TargetOpcode::G_FPTOUI_SAT:
2881+
Observer.changingInstr(MI);
2882+
2883+
if (TypeIdx == 0) {
2884+
Register OldDst = MI.getOperand(0).getReg();
2885+
LLT Ty = MRI.getType(OldDst);
2886+
Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
2887+
Register NewDst;
2888+
MI.getOperand(0).setReg(ExtReg);
2889+
uint64_t ShortBits = Ty.getScalarSizeInBits();
2890+
uint64_t WideBits = WideTy.getScalarSizeInBits();
2891+
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2892+
if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
2893+
// z = i16 fptosi_sat(a)
2894+
// ->
2895+
// x = i32 fptosi_sat(a)
2896+
// y = smin(x, 32767)
2897+
// z = smax(y, -32768)
2898+
auto MaxVal = MIRBuilder.buildConstant(
2899+
WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
2900+
auto MinVal = MIRBuilder.buildConstant(
2901+
WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
2902+
Register MidReg =
2903+
MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
2904+
NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
2905+
} else {
2906+
// z = i16 fptoui_sat(a)
2907+
// ->
2908+
// x = i32 fptoui_sat(a)
2909+
// y = smin(x, 65535)
2910+
auto MaxVal = MIRBuilder.buildConstant(
2911+
WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
2912+
NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
2913+
}
2914+
MIRBuilder.buildTrunc(OldDst, NewDst);
2915+
} else
2916+
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2917+
28752918
Observer.changedInstr(MI);
28762919
return Legalized;
28772920
case TargetOpcode::G_LOAD:
@@ -4170,6 +4213,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
41704213
return lowerFPTOUI(MI);
41714214
case G_FPTOSI:
41724215
return lowerFPTOSI(MI);
4216+
case G_FPTOUI_SAT:
4217+
case G_FPTOSI_SAT:
4218+
return lowerFPTOINT_SAT(MI);
41734219
case G_FPTRUNC:
41744220
return lowerFPTRUNC(MI);
41754221
case G_FPOWI:
@@ -4982,6 +5028,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
49825028
case G_UITOFP:
49835029
case G_FPTOSI:
49845030
case G_FPTOUI:
5031+
case G_FPTOSI_SAT:
5032+
case G_FPTOUI_SAT:
49855033
case G_INTTOPTR:
49865034
case G_PTRTOINT:
49875035
case G_ADDRSPACE_CAST:
@@ -5773,6 +5821,8 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
57735821
case TargetOpcode::G_FPEXT:
57745822
case TargetOpcode::G_FPTOSI:
57755823
case TargetOpcode::G_FPTOUI:
5824+
case TargetOpcode::G_FPTOSI_SAT:
5825+
case TargetOpcode::G_FPTOUI_SAT:
57765826
case TargetOpcode::G_SITOFP:
57775827
case TargetOpcode::G_UITOFP: {
57785828
Observer.changingInstr(MI);
@@ -7281,6 +7331,103 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
72817331
return Legalized;
72827332
}
72837333

7334+
LegalizerHelper::LegalizeResult
7335+
LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
7336+
auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
7337+
7338+
bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
7339+
unsigned SatWidth = DstTy.getScalarSizeInBits();
7340+
7341+
// Determine minimum and maximum integer values and their corresponding
7342+
// floating-point values.
7343+
APInt MinInt, MaxInt;
7344+
if (IsSigned) {
7345+
MinInt = APInt::getSignedMinValue(SatWidth);
7346+
MaxInt = APInt::getSignedMaxValue(SatWidth);
7347+
} else {
7348+
MinInt = APInt::getMinValue(SatWidth);
7349+
MaxInt = APInt::getMaxValue(SatWidth);
7350+
}
7351+
7352+
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
7353+
APFloat MinFloat(Semantics);
7354+
APFloat MaxFloat(Semantics);
7355+
7356+
APFloat::opStatus MinStatus =
7357+
MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
7358+
APFloat::opStatus MaxStatus =
7359+
MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
7360+
bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
7361+
!(MaxStatus & APFloat::opStatus::opInexact);
7362+
7363+
// If the integer bounds are exactly representable as floats and min/max are
7364+
// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
7365+
// of comparisons and selects.
7366+
bool MinMaxLegal = LI.isLegal({TargetOpcode::G_FMINNUM, SrcTy}) &&
7367+
LI.isLegal({TargetOpcode::G_FMAXNUM, SrcTy});
7368+
if (AreExactFloatBounds && MinMaxLegal) {
7369+
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
7370+
auto Max = MIRBuilder.buildFMaxNum(
7371+
SrcTy, Src, MIRBuilder.buildFConstant(SrcTy, MinFloat));
7372+
// Clamp by MaxFloat from above. NaN cannot occur.
7373+
auto Min = MIRBuilder.buildFMinNum(
7374+
SrcTy, Max, MIRBuilder.buildFConstant(SrcTy, MaxFloat),
7375+
MachineInstr::FmNoNans);
7376+
// Convert clamped value to integer. In the unsigned case we're done,
7377+
// because we mapped NaN to MinFloat, which will cast to zero.
7378+
if (!IsSigned) {
7379+
MIRBuilder.buildFPTOUI(Dst, Min);
7380+
MI.eraseFromParent();
7381+
return Legalized;
7382+
}
7383+
7384+
// Otherwise, select 0 if Src is NaN.
7385+
auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
7386+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7387+
DstTy.changeElementSize(1), Src, Src);
7388+
MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
7389+
FpToInt);
7390+
MI.eraseFromParent();
7391+
return Legalized;
7392+
}
7393+
7394+
// Result of direct conversion. The assumption here is that the operation is
7395+
// non-trapping and it's fine to apply it to an out-of-range value if we
7396+
// select it away later.
7397+
auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
7398+
: MIRBuilder.buildFPTOUI(DstTy, Src);
7399+
7400+
// If Src ULT MinFloat, select MinInt. In particular, this also selects
7401+
// MinInt if Src is NaN.
7402+
auto ULT =
7403+
MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
7404+
MIRBuilder.buildFConstant(SrcTy, MinFloat));
7405+
auto Max = MIRBuilder.buildSelect(
7406+
DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
7407+
// If Src OGT MaxFloat, select MaxInt.
7408+
auto OGT =
7409+
MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
7410+
MIRBuilder.buildFConstant(SrcTy, MaxFloat));
7411+
7412+
// In the unsigned case we are done, because we mapped NaN to MinInt, which
7413+
// is already zero.
7414+
if (!IsSigned) {
7415+
MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
7416+
Max, MachineInstr::FmNoNans);
7417+
MI.eraseFromParent();
7418+
return Legalized;
7419+
}
7420+
7421+
// Otherwise, select 0 if Src is NaN.
7422+
auto Min = MIRBuilder.buildSelect(
7423+
DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
7424+
auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
7425+
DstTy.changeElementSize(1), Src, Src);
7426+
MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
7427+
MI.eraseFromParent();
7428+
return Legalized;
7429+
}
7430+
72847431
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
72857432
LegalizerHelper::LegalizeResult
72867433
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2137,6 +2137,12 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
21372137
}
21382138
return false;
21392139
}
2140+
case TargetOpcode::G_FPTOSI_SAT:
2141+
I.setDesc(TII.get(TargetOpcode::G_FPTOSI));
2142+
return true;
2143+
case TargetOpcode::G_FPTOUI_SAT:
2144+
I.setDesc(TII.get(TargetOpcode::G_FPTOUI));
2145+
return true;
21402146
default:
21412147
return false;
21422148
}

0 commit comments

Comments
 (0)