diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 3ec6850d6f685..8e09e6f8d4935 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1872,6 +1872,16 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { } bool LoopVectorizationLegality::canFoldTailByMasking() const { + // The only loops we can vectorize without a scalar epilogue, are loops with + // a bottom-test and a single exiting block. We'd have to handle the fact + // that not every instruction executes on the last iteration. This will + // require a lane mask which varies through the vector loop body. (TODO) + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { + LLVM_DEBUG( + dbgs() + << "LV: Cannot fold tail by masking. Requires a singe latch exit\n"); + return false; + } LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 55cc801e91452..a0f239f00f106 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3987,22 +3987,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { break; } - // The only loops we can vectorize without a scalar epilogue, are loops with - // a bottom-test and a single exiting block. We'd have to handle the fact - // that not every instruction executes on the last iteration. This will - // require a lane mask which varies through the vector loop body. (TODO) - if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { - // If there was a tail-folding hint/switch, but we can't fold the tail by - // masking, fallback to a vectorization with a scalar epilogue. - if (ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate) { - LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a " - "scalar epilogue instead.\n"); - ScalarEpilogueStatus = CM_ScalarEpilogueAllowed; - return computeFeasibleMaxVF(MaxTC, UserVF, false); - } - return FixedScalableVFPair::getNone(); - } - // Now try the tail folding // Invalidate interleave groups that require an epilogue if we can't mask @@ -4032,14 +4016,19 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { } auto NoScalarEpilogueNeeded = [this, &UserIC](unsigned MaxVF) { + // Return false if the loop is neither a single-latch-exit loop nor an + // early-exit loop as tail-folding is not supported in that case. + if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch() && + !Legal->hasUncountableEarlyExit()) + return false; unsigned MaxVFtimesIC = UserIC ? MaxVF * UserIC : MaxVF; ScalarEvolution *SE = PSE.getSE(); - // Currently only loops with countable exits are vectorized, but calling - // getSymbolicMaxBackedgeTakenCount allows enablement work for loops with - // uncountable exits whilst also ensuring the symbolic maximum and known - // back-edge taken count remain identical for loops with countable exits. + // Calling getSymbolicMaxBackedgeTakenCount enables support for loops + // with uncountable exits. For countable loops, the symbolic maximum must + // remain identical to the known back-edge taken count. const SCEV *BackedgeTakenCount = PSE.getSymbolicMaxBackedgeTakenCount(); - assert(BackedgeTakenCount == PSE.getBackedgeTakenCount() && + assert((Legal->hasUncountableEarlyExit() || + BackedgeTakenCount == PSE.getBackedgeTakenCount()) && "Invalid loop count"); const SCEV *ExitCount = SE->getAddExpr( BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));