diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index da4dc4881db53..a0edd296caab8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -182,11 +182,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, "Instruction shouldn't have been visited."); if (auto *Br = dyn_cast(Inst)) { - if (TheLoop->getLoopLatch() == BB || - any_of(successors(BB), - [this](BasicBlock *Succ) { return !TheLoop->contains(Succ); })) - continue; - // Conditional branch instruction are represented using BranchOnCond // recipes. if (Br->isConditional()) { @@ -251,6 +246,8 @@ std::unique_ptr PlainCFGBuilder::buildPlainCFG( DenseMap &VPB2IRBB) { VPIRBasicBlock *Entry = cast(Plan->getEntry()); BB2VPBB[Entry->getIRBasicBlock()] = Entry; + for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks()) + BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB; // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for @@ -276,7 +273,6 @@ std::unique_ptr PlainCFGBuilder::buildPlainCFG( for (BasicBlock *BB : RPO) { // Create or retrieve the VPBasicBlock for this BB. VPBasicBlock *VPBB = getOrCreateVPBB(BB); - Loop *LoopForBB = LI->getLoopFor(BB); // Set VPBB predecessors in the same order as they are in the incoming BB. setVPBBPredsFromBB(VPBB, BB); @@ -307,24 +303,12 @@ std::unique_ptr PlainCFGBuilder::buildPlainCFG( BasicBlock *IRSucc1 = BI->getSuccessor(1); VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0); VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1); - - // Don't connect any blocks outside the current loop except the latches for - // inner loops. - // TODO: Also connect exit blocks during initial VPlan construction. - if (LoopForBB == TheLoop || BB != LoopForBB->getLoopLatch()) { - if (!LoopForBB->contains(IRSucc0)) { - VPBB->setOneSuccessor(Successor1); - continue; - } - if (!LoopForBB->contains(IRSucc1)) { - VPBB->setOneSuccessor(Successor0); - continue; - } - } - VPBB->setTwoSuccessors(Successor0, Successor1); } + for (auto *EB : Plan->getExitBlocks()) + setVPBBPredsFromBB(EB, EB->getIRBasicBlock()); + // 2. The whole CFG has been built at this point so all the input Values must // have a VPlan counterpart. Fix VPlan header phi by adding their // corresponding VPlan operands. @@ -424,22 +408,23 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB); VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB); - VPBlockBase *Succ = LatchVPBB->getSingleSuccessor(); - assert(LatchVPBB->getNumSuccessors() <= 1 && - "Latch has more than one successor"); - if (Succ) - VPBlockUtils::disconnectBlocks(LatchVPBB, Succ); - - auto *R = Plan.createVPRegionBlock(HeaderVPB, LatchVPBB, "", - false /*isReplicator*/); - // All VPBB's reachable shallowly from HeaderVPB belong to top level loop, - // because VPlan is expected to end at top level latch disconnected above. + VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor(); + assert(LatchExitVPB && "Latch expected to be left with a single successor"); + + // Create an empty region first and insert it between PreheaderVPBB and + // LatchExitVPB, taking care to preserve the original predecessor & successor + // order of blocks. Set region entry and exiting after both HeaderVPB and + // LatchVPBB have been disconnected from their predecessors/successors. + auto *R = Plan.createVPRegionBlock("", false /*isReplicator*/); + VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R); + VPBlockUtils::disconnectBlocks(LatchVPBB, R); + VPBlockUtils::connectBlocks(PreheaderVPBB, R); + R->setEntry(HeaderVPB); + R->setExiting(LatchVPBB); + + // All VPBB's reachable shallowly from HeaderVPB belong to the current region. for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB)) VPBB->setParent(R); - - VPBlockUtils::insertBlockAfter(R, PreheaderVPBB); - if (Succ) - VPBlockUtils::connectBlocks(R, Succ); } // Add the necessary canonical IV and branch recipes required to control the @@ -491,12 +476,34 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry()); VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); - VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB); - LatchVPB->swapSuccessors(); + // The canonical LatchVPB has the header block as last successor. If it has + // another successor, this successor is an exit block - insert middle block on + // its edge. Otherwise, add middle block as another successor retaining header + // as last. + if (LatchVPB->getNumSuccessors() == 2) { + VPBlockBase *LatchExitVPB = LatchVPB->getSuccessors()[0]; + VPBlockUtils::insertOnEdge(LatchVPB, LatchExitVPB, MiddleVPBB); + } else { + VPBlockUtils::connectBlocks(LatchVPB, MiddleVPBB); + LatchVPB->swapSuccessors(); + } addCanonicalIVRecipes(Plan, cast(HeaderVPB), cast(LatchVPB), InductionTy, IVDL); + // Disconnect all edges to exit blocks other than from the middle block. + // TODO: VPlans with early exits should be explicitly converted to a form + // exiting only via the latch here, including adjusting the exit condition, + // instead of simply disconnecting the edges and adjusting the VPlan later. + for (VPBlockBase *EB : Plan.getExitBlocks()) { + for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) { + if (Pred == MiddleVPBB) + continue; + cast(Pred)->getTerminator()->eraseFromParent(); + VPBlockUtils::disconnectBlocks(Pred, EB); + } + } + // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when // vectorizing loops with uncountable early exits. @@ -523,6 +530,8 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, // 3) Otherwise, construct a runtime check. if (!RequiresScalarEpilogueCheck) { + if (auto *LatchExitVPB = MiddleVPBB->getSingleSuccessor()) + VPBlockUtils::disconnectBlocks(MiddleVPBB, LatchExitVPB); VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); // The exit blocks are unreachable, remove their recipes to make sure no // users remain that may pessimize transforms. @@ -533,10 +542,8 @@ void VPlanTransforms::prepareForVectorization(VPlan &Plan, Type *InductionTy, return; } - // The connection order corresponds to the operands of the conditional branch. - BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); - auto *VPExitBlock = Plan.getExitBlock(IRExitBlock); - VPBlockUtils::connectBlocks(MiddleVPBB, VPExitBlock); + // The connection order corresponds to the operands of the conditional branch, + // with the middle block already connected to the exit block. VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 91a5ea6b7fe36..fe845ae74cbee 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -31,7 +31,11 @@ define void @foo(i64 %n) { ; CHECK-NEXT: outer.latch: ; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> ; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> -; CHECK-NEXT: Successor(s): outer.header +; CHECK-NEXT: EMIT branch-on-cond ir<%outer.ec> +; CHECK-NEXT: Successor(s): ir-bb, outer.header +; CHECK-EMPTY: +; CHECK-NEXT: ir-bb: +; CHECK-NEXT: No successors ; CHECK-NEXT: } entry: br label %outer.header diff --git a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp index dbed67a03fdeb..f33e9615d4176 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -51,7 +51,7 @@ TEST_F(VPlanHCFGTest, testBuildHCFGInnerLoop) { // Check that the region following the preheader consists of a block for the // original header and a separate latch. VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock(); - EXPECT_EQ(10u, VecBB->size()); + EXPECT_EQ(11u, VecBB->size()); EXPECT_EQ(0u, VecBB->getNumPredecessors()); EXPECT_EQ(0u, VecBB->getNumSuccessors()); EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); @@ -129,6 +129,7 @@ compound=true " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%indvars.iv.next\> = add ir\<%indvars.iv\>, ir\<1\>\l" + " EMIT ir\<%exitcond\> = icmp ir\<%indvars.iv.next\>, ir\<%N\>\l" + + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + "No successors\l" @@ -212,7 +213,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { // Check that the region following the preheader consists of a block for the // original header and a separate latch. VPBasicBlock *VecBB = Plan->getVectorLoopRegion()->getEntryBasicBlock(); - EXPECT_EQ(11u, VecBB->size()); + EXPECT_EQ(12u, VecBB->size()); EXPECT_EQ(0u, VecBB->getNumPredecessors()); EXPECT_EQ(0u, VecBB->getNumSuccessors()); EXPECT_EQ(VecBB->getParent()->getEntryBasicBlock(), VecBB); @@ -229,6 +230,7 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) { EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_NE(nullptr, dyn_cast(&*Iter++)); + EXPECT_NE(nullptr, dyn_cast(&*Iter++)); EXPECT_EQ(VecBB->end(), Iter); } @@ -302,6 +304,7 @@ compound=true " EMIT store ir\<%res\>, ir\<%arr.idx\>\l" + " EMIT ir\<%iv.next\> = add ir\<%iv\>, ir\<1\>\l" + " EMIT ir\<%exitcond\> = icmp ir\<%iv.next\>, ir\<%N\>\l" + + " EMIT vp\<%3\> = not ir\<%exitcond\>\l" + " EMIT vp\<%index.next\> = add nuw vp\<%2\>, vp\<%0\>\l" + " EMIT branch-on-count vp\<%index.next\>, vp\<%1\>\l" + "No successors\l"