Skip to content

[LV] Vectorize FMax w/o fast-math flags. #146711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
FMaxNoFMFs, ///< FP max implemented in terms of select(cmp()), but without
///any fast-math flags. Users need to handle NaNs and signed zeros when generating code.
FMinimum, ///< FP min with llvm.minimum semantics
FMaximum, ///< FP max with llvm.maximum semantics
FMinimumNum, ///< FP min with llvm.minimumnum semantics
Expand All @@ -57,6 +59,9 @@ enum class RecurKind {
FindFirstIVSMin, /// FindFirst reduction with select(icmp(),x,y) where one of
///< (x,y) is a decreasing loop induction, and both x and y
///< are integer type, producing a SMin reduction.
FindFirstIVUMin, /// FindFirst reduction with select(icmp(),x,y) where one of
///< (x,y) is a decreasing loop induction, and both x and y
///< are integer type, producing a UMin reduction.
FindLastIVSMax, ///< FindLast reduction with select(cmp(),x,y) where one of
///< (x,y) is increasing loop induction, and both x and y
///< are integer type, producing a SMax reduction.
Expand Down Expand Up @@ -247,8 +252,9 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum ||
Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum;
Kind == RecurKind::FMaxNoFMFs || Kind == RecurKind::FMinimum ||
Kind == RecurKind::FMaximum || Kind == RecurKind::FMinimumNum ||
Kind == RecurKind::FMaximumNum;
}

/// Returns true if the recurrence kind is any min/max kind.
Expand All @@ -265,7 +271,8 @@ class RecurrenceDescriptor {
/// Returns true if the recurrence kind is of the form
/// select(cmp(),x,y) where one of (x,y) is decreasing loop induction.
static bool isFindFirstIVRecurrenceKind(RecurKind Kind) {
return Kind == RecurKind::FindFirstIVSMin;
return Kind == RecurKind::FindFirstIVSMin ||
Kind == RecurKind::FindFirstIVUMin;
}

/// Returns true if the recurrence kind is of the form
Expand Down
26 changes: 18 additions & 8 deletions llvm/lib/Analysis/IVDescriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::UMin:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
return true;
Expand Down Expand Up @@ -741,10 +742,9 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
: APInt::getMinValue(NumBits);
ValidRange = ConstantRange::getNonEmpty(Sentinel + 1, Sentinel);
} else {
assert(IsSigned && "Only FindFirstIV with SMax is supported currently");
ValidRange =
ConstantRange::getNonEmpty(APInt::getSignedMinValue(NumBits),
APInt::getSignedMaxValue(NumBits) - 1);
APInt Sentinel = IsSigned ? APInt::getSignedMaxValue(NumBits)
: APInt::getMaxValue(NumBits);
ValidRange = ConstantRange::getNonEmpty(Sentinel, Sentinel - 1);
}

LLVM_DEBUG(dbgs() << "LV: "
Expand All @@ -770,6 +770,8 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,

if (CheckRange(true))
return RecurKind::FindFirstIVSMin;
if (CheckRange(false))
return RecurKind::FindFirstIVUMin;
return std::nullopt;
};

Expand Down Expand Up @@ -815,7 +817,8 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
if (match(I, m_OrdOrUnordFMin(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_OrdOrUnordFMax(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
return InstDesc(Kind == RecurKind::FMax || Kind == RecurKind::FMaxNoFMFs,
I);
if (match(I, m_FMinNum(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_FMaxNum(m_Value(), m_Value())))
Expand Down Expand Up @@ -937,10 +940,15 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) ||
match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value()));
};
if (isIntMinMaxRecurrenceKind(Kind) ||
(HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
if (isIntMinMaxRecurrenceKind(Kind))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
if (isFPMinMaxRecurrenceKind(Kind)) {
if (HasRequiredFMF())
return isMinMaxPattern(I, Kind, Prev);
if ((Kind == RecurKind::FMax || Kind == RecurKind::FMaxNoFMFs) &&
isMinMaxPattern(I, Kind, Prev).isRecurrence())
return InstDesc(I, RecurKind::FMaxNoFMFs);
} else if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
Expand Down Expand Up @@ -1183,6 +1191,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::Mul;
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::Or:
Expand All @@ -1202,6 +1211,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::UMin:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMaxNoFMFs:
case RecurKind::FMin:
case RecurKind::FMaximum:
case RecurKind::FMinimum:
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,7 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) {
return Intrinsic::vector_reduce_umax;
case RecurKind::UMin:
return Intrinsic::vector_reduce_umin;
case RecurKind::FMaxNoFMFs:
case RecurKind::FMax:
return Intrinsic::vector_reduce_fmax;
case RecurKind::FMin:
Expand Down Expand Up @@ -1085,6 +1086,7 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
case RecurKind::FMin:
return CmpInst::FCMP_OLT;
case RecurKind::FMax:
case RecurKind::FMaxNoFMFs:
return CmpInst::FCMP_OGT;
// We do not add FMinimum/FMaximum recurrence kind here since there is no
// equivalent predicate which compares signed zeroes according to the
Expand Down Expand Up @@ -1307,6 +1309,7 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
case RecurKind::UMax:
case RecurKind::UMin:
case RecurKind::FMax:
case RecurKind::FMaxNoFMFs:
case RecurKind::FMin:
case RecurKind::FMinimum:
case RecurKind::FMaximum:
Expand Down
39 changes: 19 additions & 20 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4459,8 +4459,13 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
ElementCount VF) const {
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
if (any_of(OrigLoop->getHeader()->phis(),
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) {
return Legal->isFixedOrderRecurrence(&Phi) ||
(Legal->isReductionVariable(&Phi) &&
Legal->getReductionVars()
.find(&Phi)
->second.getRecurrenceKind() == RecurKind::FMaxNoFMFs);
}))
return false;

// Phis with uses outside of the loop require special handling and are
Expand Down Expand Up @@ -7267,8 +7272,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(

auto *EpiRedHeaderPhi =
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc =
EpiRedHeaderPhi->getRecurrenceDescriptor();
RecurKind Kind = EpiRedHeaderPhi->getRecurrenceKind();
Value *MainResumeValue;
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
assert((VPI->getOpcode() == VPInstruction::Broadcast ||
Expand All @@ -7277,8 +7281,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
} else
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind)) {
[[maybe_unused]] Value *StartV =
EpiRedResult->getOperand(1)->getLiveInIRValue();
auto *Cmp = cast<ICmpInst>(MainResumeValue);
Expand All @@ -7288,8 +7291,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
} else if (RecurrenceDescriptor::isFindIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
} else if (RecurrenceDescriptor::isFindIVRecurrenceKind(Kind)) {
Value *StartV = getStartValueFromReductionResult(EpiRedResult);
Value *SentinelV = EpiRedResult->getOperand(2)->getLiveInIRValue();
using namespace llvm::PatternMatch;
Expand Down Expand Up @@ -8912,6 +8914,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(

// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);
if (!VPlanTransforms::runPass(
VPlanTransforms::handleFMaxReductionsWithoutFastMath, *Plan))
return nullptr;

// Transform recipes to abstract recipes if it is legal and beneficial and
// clamp the range for better cost estimation.
Expand Down Expand Up @@ -9070,8 +9075,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
RecurKind Kind = RdxDesc.getRecurrenceKind();
RecurKind Kind = PhiR->getRecurrenceKind();
assert(
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
Expand Down Expand Up @@ -9177,6 +9181,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(CurrentLinkI->getParent()))
CondOp = RecipeBuilder.getBlockInMask(CurrentLink->getParent());

const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
cast<PHINode>(PhiR->getUnderlyingInstr()));
// Non-FP RdxDescs will have all fast math flags set, so clear them.
FastMathFlags FMFs = isa<FPMathOperator>(CurrentLinkI)
? RdxDesc.getFastMathFlags()
Expand Down Expand Up @@ -9207,7 +9213,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR)
continue;

const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
const RecurrenceDescriptor &RdxDesc = Legal->getReductionVars().lookup(
cast<PHINode>(PhiR->getUnderlyingInstr()));
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
// If tail is folded by masking, introduce selects between the phi
// and the users outside the vector region of each reduction, at the
Expand Down Expand Up @@ -9853,14 +9860,9 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
}));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
const RecurrenceDescriptor &RdxDesc =
ReductionPhi->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
RecurKind RK = ReductionPhi->getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
"start value from ComputeAnyOfResult must match");

// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
Expand All @@ -9869,9 +9871,6 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindIVRecurrenceKind(RK)) {
Value *StartV = getStartValueFromReductionResult(RdxResult);
assert(RdxDesc.getRecurrenceStartValue() == StartV &&
"start value from ComputeFinIVResult must match");

ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);

Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23181,6 +23181,7 @@ class HorizontalReduction {
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
Expand Down Expand Up @@ -23317,6 +23318,7 @@ class HorizontalReduction {
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaximumNum:
Expand Down Expand Up @@ -23418,8 +23420,10 @@ class HorizontalReduction {
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
case RecurKind::FindFirstIVSMin:
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
case RecurKind::FMaxNoFMFs:
case RecurKind::FMaximumNum:
case RecurKind::FMinimumNum:
case RecurKind::None:
Expand Down
25 changes: 17 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
ReductionStartVector,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,

/// Extracts a single lane (first operand) from a set of vector operands.
/// The lane specifies an index into a vector formed by combining all vector
/// operands (all operands after the first one).
ExtractLane,
};

private:
Expand Down Expand Up @@ -2192,7 +2195,7 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
public VPUnrollPartAccessor<2> {
/// Descriptor for the reduction.
const RecurrenceDescriptor &RdxDesc;
const RecurKind Kind;

/// The phi is part of an in-loop reduction.
bool IsInLoop;
Expand All @@ -2211,16 +2214,24 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPValue &Start, bool IsInLoop = false,
bool IsOrdered = false, unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
VFScaleFactor(VFScaleFactor) {
Kind(RdxDesc.getRecurrenceKind()), IsInLoop(IsInLoop),
IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start,
bool IsInLoop = false, bool IsOrdered = false,
unsigned VFScaleFactor = 1)
: VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
}

~VPReductionPHIRecipe() override = default;

VPReductionPHIRecipe *clone() override {

auto *R = new VPReductionPHIRecipe(
dyn_cast_or_null<PHINode>(getUnderlyingValue()), RdxDesc,
dyn_cast_or_null<PHINode>(getUnderlyingValue()), getRecurrenceKind(),
*getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
R->addOperand(getBackedgeValue());
return R;
Expand All @@ -2240,9 +2251,7 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
VPSlotTracker &SlotTracker) const override;
#endif

const RecurrenceDescriptor &getRecurrenceDescriptor() const {
return RdxDesc;
}
RecurKind getRecurrenceKind() const { return Kind; }

/// Returns true, if the phi is part of an ordered reduction.
bool isOrdered() const { return IsOrdered; }
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
return ResTy;
}
case Instruction::ICmp:
case Instruction::FCmp:
case VPInstruction::ActiveLaneMask:
assert(inferScalarType(R->getOperand(0)) ==
inferScalarType(R->getOperand(1)) &&
Expand All @@ -110,6 +111,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BuildStructVector:
case VPInstruction::BuildVector:
return SetResultTyFromOp();
case VPInstruction::ExtractLane:
return inferScalarType(R->getOperand(1));
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
Expand Down
Loading
Loading