diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index af94dc01c8c5c..dd7f05465a50b 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9544,14 +9544,14 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { Range); auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG. - // Convert to VPlan-transform and consoliate all transforms for VPlan + // TODO: Convert to VPlan-transform and consolidate all transforms for VPlan // creation. VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); - HCFGBuilder.buildHierarchicalCFG(); + HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceTopLevelVectorLoopRegion( - *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, - CM.foldTailByMasking(), OrigLoop); + VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(), + PSE, RequiresScalarEpilogueCheck, + CM.foldTailByMasking(), OrigLoop); // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split @@ -9851,10 +9851,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); - HCFGBuilder.buildHierarchicalCFG(); + HCFGBuilder.buildPlainCFG(); - VPlanTransforms::introduceTopLevelVectorLoopRegion( - *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop); + VPlanTransforms::createLoopRegions(*Plan, Legal->getWidestInductionType(), + PSE, true, false, OrigLoop); for (ElementCount VF : Range) Plan->addVF(VF); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 94b5167c60089..7084676af6d5b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -57,6 +57,7 @@ class SCEV; class Type; class VPBasicBlock; class VPBuilder; +class VPDominatorTree; class VPRegionBlock; class VPlan; class VPLane; @@ -303,6 +304,13 @@ class VPBlockBase { /// Remove all the successors of this block. void clearSuccessors() { Successors.clear(); } + /// Swap predecessors of the block. The block must have exactly 2 + /// predecessors. + void swapPredecessors() { + assert(Predecessors.size() == 2 && "must have 2 predecessors to swap"); + std::swap(Predecessors[0], Predecessors[1]); + } + /// Swap successors of the block. The block must have exactly 2 successors. // TODO: This should be part of introducing conditional branch recipes rather // than being independent. diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f58f0290b5fa9..1e687d0879f18 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -14,26 +14,88 @@ #include "LoopVectorizationPlanner.h" #include "VPlan.h" #include "VPlanCFG.h" +#include "VPlanDominatorTree.h" #include "VPlanTransforms.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" using namespace llvm; -void VPlanTransforms::introduceTopLevelVectorLoopRegion( - VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { - // TODO: Generalize to introduce all loop regions. - auto *HeaderVPBB = cast(Plan.getEntry()->getSingleSuccessor()); - VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB); +/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it +/// has exactly 2 predecessors (preheader and latch), where the block +/// dominates the latch and the preheader dominates the block. If it is a +/// header block return true, making sure the preheader appears first and +/// the latch second. Otherwise return false. +static bool canonicalHeader(VPBlockBase *HeaderVPB, + const VPDominatorTree &VPDT) { + ArrayRef Preds = HeaderVPB->getPredecessors(); + if (Preds.size() != 2) + return false; - VPBasicBlock *OriginalLatch = - cast(HeaderVPBB->getSinglePredecessor()); - VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB); - VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); - VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader); - assert(OriginalLatch->getNumSuccessors() == 0 && - "Plan should end at top level latch"); + auto *PreheaderVPBB = Preds[0]; + auto *LatchVPBB = Preds[1]; + if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && + VPDT.dominates(HeaderVPB, LatchVPBB)) + return true; + + std::swap(PreheaderVPBB, LatchVPBB); + + if (VPDT.dominates(PreheaderVPBB, HeaderVPB) && + VPDT.dominates(HeaderVPB, LatchVPBB)) { + // Canonicalize predecessors of header so that preheader is first and latch + // second. + HeaderVPB->swapPredecessors(); + for (VPRecipeBase &R : cast(HeaderVPB)->phis()) + R.swapOperands(); + return true; + } + + return false; +} + +/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB. +static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { + auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; + auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; + + VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); + VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); + VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); + assert(LatchVPBB->getNumSuccessors() <= 1 && + "Latch has more than one successor"); + if (Succ) + VPBlockUtils::disconnectBlocks(LatchVPBB, Succ); + + auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "", + false /*isReplicator*/); + R->setParent(HeaderVPB->getParent()); + // All VPBB's reachable shallowly from HeaderVPB belong to top level loop, + // because VPlan is expected to end at top level latch disconnected above. + for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB)) + VPBB->setParent(R); + + VPBlockUtils::insertBlockAfter(R, PreheaderVPBB); + if (Succ) + VPBlockUtils::connectBlocks(R, Succ); +} + +void VPlanTransforms::createLoopRegions(VPlan &Plan, Type *InductionTy, + PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop) { + VPDominatorTree VPDT; + VPDT.recalculate(Plan); + for (VPBlockBase *HeaderVPB : vp_depth_first_shallow(Plan.getEntry())) + if (canonicalHeader(HeaderVPB, VPDT)) + createLoopRegion(Plan, HeaderVPB); + + VPRegionBlock *TopRegion = Plan.getVectorLoopRegion(); + auto *OrigExiting = TopRegion->getExiting(); + VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, OrigExiting); + TopRegion->setExiting(LatchVPBB); + TopRegion->setName("vector loop"); + TopRegion->getEntryBasicBlock()->setName("vector.body"); // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when @@ -47,18 +109,9 @@ void VPlanTransforms::introduceTopLevelVectorLoopRegion( Plan.setTripCount( vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); - // Create VPRegionBlock, with existing header and new empty latch block, to be - // filled. - VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); - VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch); - auto *TopRegion = Plan.createVPRegionBlock( - HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/); - // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop, - // because VPlan is expected to end at top level latch. - for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) - VPBB->setParent(TopRegion); - - VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); + VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); + VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry()); + VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index 4b8a2420b3037..5bacd2d4e6d88 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -12,9 +12,7 @@ /// components and steps: // /// 1. PlainCFGBuilder class: builds a plain VPBasicBlock-based CFG that -/// faithfully represents the CFG in the incoming IR. A VPRegionBlock (Top -/// Region) is created to enclose and serve as parent of all the VPBasicBlocks -/// in the plain CFG. +/// faithfully represents the CFG in the incoming IR. /// NOTE: At this point, there is a direct correspondence between all the /// VPBasicBlocks created for the initial plain CFG and the incoming /// BasicBlocks. However, this might change in the future. @@ -57,12 +55,8 @@ class PlainCFGBuilder { // Hold phi node's that need to be fixed once the plain CFG has been built. SmallVector PhisToFix; - /// Maps loops in the original IR to their corresponding region. - DenseMap Loop2Region; - // Utility functions. void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB); - void setRegionPredsFromBB(VPRegionBlock *VPBB, BasicBlock *BB); void fixHeaderPhis(); VPBasicBlock *getOrCreateVPBB(BasicBlock *BB); #ifndef NDEBUG @@ -83,25 +77,6 @@ class PlainCFGBuilder { // Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB // must have no predecessors. void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) { - auto GetLatchOfExit = [this](BasicBlock *BB) -> BasicBlock * { - auto *SinglePred = BB->getSinglePredecessor(); - Loop *LoopForBB = LI->getLoopFor(BB); - if (!SinglePred || LI->getLoopFor(SinglePred) == LoopForBB) - return nullptr; - // The input IR must be in loop-simplify form, ensuring a single predecessor - // for exit blocks. - assert(SinglePred == LI->getLoopFor(SinglePred)->getLoopLatch() && - "SinglePred must be the only loop latch"); - return SinglePred; - }; - if (auto *LatchBB = GetLatchOfExit(BB)) { - auto *PredRegion = getOrCreateVPBB(LatchBB)->getParent(); - assert(VPBB == cast(PredRegion->getSingleSuccessor()) && - "successor must already be set for PredRegion; it must have VPBB " - "as single successor"); - VPBB->setPredecessors({PredRegion}); - return; - } // Collect VPBB predecessors. SmallVector VPBBPreds; for (BasicBlock *Pred : predecessors(BB)) @@ -113,13 +88,6 @@ static bool isHeaderBB(BasicBlock *BB, Loop *L) { return L && BB == L->getHeader(); } -void PlainCFGBuilder::setRegionPredsFromBB(VPRegionBlock *Region, - BasicBlock *BB) { - // BB is a loop header block. Connect the region to the loop preheader. - Loop *LoopOfBB = LI->getLoopFor(BB); - Region->setPredecessors({getOrCreateVPBB(LoopOfBB->getLoopPredecessor())}); -} - // Add operands to VPInstructions representing phi nodes from the input IR. void PlainCFGBuilder::fixHeaderPhis() { for (auto *Phi : PhisToFix) { @@ -130,43 +98,18 @@ void PlainCFGBuilder::fixHeaderPhis() { auto *VPPhi = cast(VPVal); assert(VPPhi->getNumOperands() == 0 && "Expected VPInstruction with no operands."); - - Loop *L = LI->getLoopFor(Phi->getParent()); - assert(isHeaderBB(Phi->getParent(), L)); - // For header phis, make sure the incoming value from the loop - // predecessor is the first operand of the recipe. + assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) && + "Expected Phi in header block."); assert(Phi->getNumOperands() == 2 && "header phi must have exactly 2 operands"); - BasicBlock *LoopPred = L->getLoopPredecessor(); - VPPhi->addOperand( - getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopPred))); - BasicBlock *LoopLatch = L->getLoopLatch(); - VPPhi->addOperand( - getOrCreateVPOperand(Phi->getIncomingValueForBlock(LoopLatch))); - } -} - -static bool isHeaderVPBB(VPBasicBlock *VPBB) { - return VPBB->getParent() && VPBB->getParent()->getEntry() == VPBB; -} - -/// Return true of \p L loop is contained within \p OuterLoop. -static bool doesContainLoop(const Loop *L, const Loop *OuterLoop) { - if (L->getLoopDepth() < OuterLoop->getLoopDepth()) - return false; - const Loop *P = L; - while (P) { - if (P == OuterLoop) - return true; - P = P->getParentLoop(); + for (BasicBlock *Pred : predecessors(Phi->getParent())) + VPPhi->addOperand( + getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred))); } - return false; } -// Create a new empty VPBasicBlock for an incoming BasicBlock in the region -// corresponding to the containing loop or retrieve an existing one if it was -// already created. If no region exists yet for the loop containing \p BB, a new -// one is created. +// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an +// existing one if it was already created. VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { if (auto *VPBB = BB2VPBB.lookup(BB)) { // Retrieve existing VPBB. @@ -174,32 +117,10 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { } // Create new VPBB. - StringRef Name = isHeaderBB(BB, TheLoop) ? "vector.body" : BB->getName(); + StringRef Name = BB->getName(); LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n"); VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name); BB2VPBB[BB] = VPBB; - - // Get or create a region for the loop containing BB, except for the top - // region of TheLoop which is created later. - Loop *LoopOfBB = LI->getLoopFor(BB); - if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop)) - return VPBB; - - auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB); - if (!isHeaderBB(BB, LoopOfBB)) { - assert(RegionOfVPBB && - "Region should have been created by visiting header earlier"); - VPBB->setParent(RegionOfVPBB); - return VPBB; - } - - assert(!RegionOfVPBB && - "First visit of a header basic block expects to register its region."); - // Handle a header - take care of its Region. - RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/); - RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); - RegionOfVPBB->setEntry(VPBB); - Loop2Region[LoopOfBB] = RegionOfVPBB; return VPBB; } @@ -351,6 +272,8 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG( DenseMap &VPB2IRBB) { + VPIRBasicBlock *Entry = cast(Plan.getEntry()); + BB2VPBB[Entry->getIRBasicBlock()] = Entry; // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for @@ -376,26 +299,13 @@ void PlainCFGBuilder::buildPlainCFG( for (BasicBlock *BB : RPO) { // Create or retrieve the VPBasicBlock for this BB. VPBasicBlock *VPBB = getOrCreateVPBB(BB); - VPRegionBlock *Region = VPBB->getParent(); Loop *LoopForBB = LI->getLoopFor(BB); // Set VPBB predecessors in the same order as they are in the incoming BB. - if (!isHeaderBB(BB, LoopForBB)) { - setVPBBPredsFromBB(VPBB, BB); - } else if (Region) { - // BB is a loop header and there's a corresponding region, set the - // predecessor for it. - setRegionPredsFromBB(Region, BB); - } + setVPBBPredsFromBB(VPBB, BB); // Create VPInstructions for BB. createVPInstructionsForVPBB(VPBB, BB); - if (BB == TheLoop->getLoopLatch()) { - VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader()); - VPBlockUtils::connectBlocks(VPBB, HeaderVPBB); - continue; - } - // Set VPBB successors. We create empty VPBBs for successors if they don't // exist already. Recipes will be created when the successor is visited // during the RPO traversal. @@ -410,10 +320,7 @@ void PlainCFGBuilder::buildPlainCFG( auto *BI = cast(BB->getTerminator()); unsigned NumSuccs = succ_size(BB); if (NumSuccs == 1) { - auto *Successor = getOrCreateVPBB(BB->getSingleSuccessor()); - VPBB->setOneSuccessor(isHeaderVPBB(Successor) - ? Successor->getParent() - : static_cast(Successor)); + VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor())); continue; } assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() && @@ -423,21 +330,11 @@ void PlainCFGBuilder::buildPlainCFG( BasicBlock *IRSucc1 = BI->getSuccessor(1); VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0); VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1); - if (BB == LoopForBB->getLoopLatch()) { - // For a latch we need to set the successor of the region rather than that - // of VPBB and it should be set to the exit, i.e., non-header successor, - // except for the top region, which is handled elsewhere. - assert(LoopForBB != TheLoop && - "Latch of the top region should have been handled earlier"); - Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 - : Successor0); - Region->setExiting(VPBB); - continue; - } - // Don't connect any blocks outside the current loop except the latch for - // now. The latch is handled above. - if (LoopForBB) { + // Don't connect any blocks outside the current loop except the latches for + // inner loops. + // TODO: Also connect exit blocks during initial VPlan construction. + if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) { if (!LoopForBB->contains(IRSucc0)) { VPBB->setOneSuccessor(Successor1); continue; @@ -456,21 +353,16 @@ void PlainCFGBuilder::buildPlainCFG( // corresponding VPlan operands. fixHeaderPhis(); - VPBlockUtils::connectBlocks(Plan.getEntry(), - getOrCreateVPBB(TheLoop->getHeader())); + Plan.getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader())); + Plan.getEntry()->setPlan(&Plan); for (const auto &[IRBB, VPB] : BB2VPBB) VPB2IRBB[VPB] = IRBB; + + LLVM_DEBUG(Plan.setName("Plain CFG\n"); dbgs() << Plan); } void VPlanHCFGBuilder::buildPlainCFG() { PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan); PCFGBuilder.buildPlainCFG(VPB2IRBB); } - -// Public interface to build a H-CFG. -void VPlanHCFGBuilder::buildHierarchicalCFG() { - // Build Top Region enclosing the plain CFG. - buildPlainCFG(); - LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan); -} diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h index f7f98ed7b1755..f2e90d3f4d9b3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h @@ -30,7 +30,6 @@ namespace llvm { class Loop; class LoopInfo; -class VPRegionBlock; class VPlan; class VPlanTestIRBase; class VPBlockBase; @@ -54,15 +53,12 @@ class VPlanHCFGBuilder { /// created for a input IR basic block. DenseMap VPB2IRBB; - /// Build plain CFG for TheLoop and connects it to Plan's entry. - void buildPlainCFG(); - public: VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P) : TheLoop(Lp), LI(LI), Plan(P) {} - /// Build H-CFG for TheLoop and update Plan accordingly. - void buildHierarchicalCFG(); + /// Build plain CFG for TheLoop and connects it to Plan's entry. + void buildPlainCFG(); /// Return the input IR BasicBlock corresponding to \p VPB. Returns nullptr if /// there is no such corresponding block. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ee3642a8aff73..a9461b261ddb6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -52,20 +52,19 @@ struct VPlanTransforms { verifyVPlanIsValid(Plan); } - /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming - /// into this function, \p Plan's top-level loop is modeled using a plain CFG. - /// This transform wraps the plain CFG of the top-level loop within a - /// VPRegionBlock and creates a VPValue expression for the original trip - /// count. It will also introduce a dedicated VPBasicBlock for the vector - /// pre-header as well a VPBasicBlock as exit block of the region - /// (middle.block). If a check is needed to guard executing the scalar + /// Replace loops in \p Plan's flat CFG with VPRegionBlocks, turing \p Plan's + /// flat CFG into a hierarchical CFG. It also creates a VPValue expression for + /// the original trip count. It will also introduce a dedicated VPBasicBlock + /// for the vector pre-header as well a VPBasicBlock as exit block of the + /// region (middle.block). If a check is needed to guard executing the scalar /// epilogue loop, it will be added to the middle block, together with /// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is /// the type of the canonical induction and used for related values, like the /// trip count expression. - static void introduceTopLevelVectorLoopRegion( - VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop); + static void createLoopRegions(VPlan &Plan, Type *InductionTy, + PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, + bool TailFolded, Loop *TheLoop); /// Replaces the VPInstructions in \p Plan with corresponding /// widen recipes. Returns false if any VPInstructions could not be converted diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index ced60a30ad56e..638156eab7a84 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -246,6 +246,12 @@ class VPUser { New->addUser(*this); } + /// Swap operands of the VPUser. It must have exactly 2 operands. + void swapOperands() { + assert(Operands.size() == 2 && "must have 2 operands to swap"); + std::swap(Operands[0], Operands[1]); + } + /// Replaces all uses of \p From in the VPUser with \p To. void replaceUsesOfWith(VPValue *From, VPValue *To); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 625a32c098f94..91a5ea6b7fe36 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -6,35 +6,32 @@ @arr = external global [8 x [8 x i64]], align 16 define void @foo(i64 %n) { -; CHECK: VPlan 'HCFGBuilder: Plain CFG +; CHECK: VPlan 'Plain CFG ; CHECK-NEXT: { ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: Successor(s): vector.body +; CHECK-NEXT: Successor(s): outer.header ; CHECK-EMPTY: -; CHECK-NEXT: vector.body: -; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> +; CHECK-NEXT: outer.header: +; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<%outer.iv.next>, ir<0> ; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> ; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> ; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> ; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: -; CHECK-NEXT: inner: { -; CHECK-NEXT: inner: -; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> -; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> -; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> -; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> -; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): outer.latch +; CHECK-NEXT: inner: +; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<%inner.iv.next>, ir<0> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> +; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> +; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> +; CHECK-NEXT: Successor(s): outer.latch, inner ; CHECK-EMPTY: ; CHECK-NEXT: outer.latch: ; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> ; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> -; CHECK-NEXT: Successor(s): vector.body +; CHECK-NEXT: Successor(s): outer.header ; CHECK-NEXT: } entry: br label %outer.header diff --git a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll index 89eaca0cfa8c8..29aeb7c4e97f9 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan_hcfg_stress_test.ll @@ -4,7 +4,7 @@ ; Verify that the stress testing flag for the VPlan H-CFG builder works as ; expected with and without enabling the VPlan H-CFG Verifier. -; CHECK: VPlan 'HCFGBuilder: Plain CFG +; CHECK: VPlan 'Plain CFG target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index caf5d2357411d..486296535996b 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -73,9 +73,9 @@ class VPlanTestIRBase : public testing::Test { PredicatedScalarEvolution PSE(*SE, *L); auto Plan = std::make_unique(L); VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); - HCFGBuilder.buildHierarchicalCFG(); - VPlanTransforms::introduceTopLevelVectorLoopRegion( - *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L); + HCFGBuilder.buildPlainCFG(); + VPlanTransforms::createLoopRegions(*Plan, IntegerType::get(*Ctx, 64), PSE, + true, false, L); return Plan; } };