diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 316468651df1a..5da8c32051960 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -628,7 +628,11 @@ const VPRecipeBase *VPBasicBlock::getTerminator() const { } bool VPBasicBlock::isExiting() const { - return getParent() && getParent()->getExitingBasicBlock() == this; + const VPRegionBlock *VPRB = getParent(); + if (!VPRB) + return false; + return VPRB->getExitingBasicBlock() == this || + VPRB->getEarlyExiting() == this; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -989,6 +993,9 @@ static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { /// Assumes a single pre-header basic-block was created for this. Introduce /// additional basic-blocks as needed, and fill them all. void VPlan::execute(VPTransformState *State) { + assert(!getVectorLoopRegion()->getEarlyExiting() && + "Executing vplans with an early exit not yet supported"); + // Initialize CFG state. State->CFG.PrevVPBB = nullptr; State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); @@ -1007,6 +1014,7 @@ void VPlan::execute(VPTransformState *State) { BasicBlock *MiddleBB = State->CFG.ExitBB; VPBasicBlock *MiddleVPBB = cast(getVectorLoopRegion()->getSingleSuccessor()); + // Find the VPBB for the scalar preheader, relying on the current structure // when creating the middle block and its successrs: if there's a single // predecessor, it must be the scalar preheader. Otherwise, the second diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 64242e43c56bc..8c63156b042f3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3236,21 +3236,33 @@ class VPIRBasicBlock : public VPBasicBlock { }; /// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks -/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG. +/// which form a Single-Entry-Single-Exiting or Single-Entry-Multiple-Exiting +/// subgraph of the output IR CFG. For the multiple-exiting case only a total +/// of two exits are currently supported and the early exit is tracked +/// separately. The first successor should always correspond to the normal +/// exiting block, i.e. vector latch -> middle.block. An optional second +/// successor corresponds to the early exit. /// A VPRegionBlock may indicate that its contents are to be replicated several /// times. This is designed to support predicated scalarization, in which a /// scalar if-then code structure needs to be generated VF * UF times. Having /// this replication indicator helps to keep a single model for multiple /// candidate VF's. The actual replication takes place only once the desired VF /// and UF have been determined. +/// TODO: The SEME case is a work in progress and any attempt to execute a +/// VPlan containing a region with multiple exits will assert. class VPRegionBlock : public VPBlockBase { - /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock. + /// Hold the Single Entry of the SESE/SEME region modelled by the + /// VPRegionBlock. VPBlockBase *Entry; - /// Hold the Single Exiting block of the SESE region modelled by the + /// Hold the normal Exiting block of the SESE/SEME region modelled by the /// VPRegionBlock. VPBlockBase *Exiting; + /// Hold the Early Exiting block of the SEME region. If this is a SESE region + /// this value should be nullptr. + VPBlockBase *EarlyExiting; + /// An indicator whether this region is to generate multiple replicated /// instances of output IR corresponding to its VPBlockBases. bool IsReplicator; @@ -3259,7 +3271,7 @@ class VPRegionBlock : public VPBlockBase { VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name = "", bool IsReplicator = false) : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting), - IsReplicator(IsReplicator) { + EarlyExiting(nullptr), IsReplicator(IsReplicator) { assert(Entry->getPredecessors().empty() && "Entry block has predecessors."); assert(Exiting->getSuccessors().empty() && "Exit block has successors."); Entry->setParent(this); @@ -3267,7 +3279,7 @@ class VPRegionBlock : public VPBlockBase { } VPRegionBlock(const std::string &Name = "", bool IsReplicator = false) : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr), - IsReplicator(IsReplicator) {} + EarlyExiting(nullptr), IsReplicator(IsReplicator) {} ~VPRegionBlock() override { if (Entry) { @@ -3297,8 +3309,8 @@ class VPRegionBlock : public VPBlockBase { const VPBlockBase *getExiting() const { return Exiting; } VPBlockBase *getExiting() { return Exiting; } - /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p - /// ExitingBlock must have no successors. + /// Set \p ExitingBlock as the normal exiting VPBlockBase of this + /// VPRegionBlock. \p ExitingBlock must have no successors. void setExiting(VPBlockBase *ExitingBlock) { assert(ExitingBlock->getSuccessors().empty() && "Exit block cannot have successors."); @@ -3306,6 +3318,24 @@ class VPRegionBlock : public VPBlockBase { ExitingBlock->setParent(this); } + /// Set \p EarlyExitingBlock as the early exiting VPBlockBase of this + /// VPRegionBlock. \p EarlyExitingBlock must have a successor, since + /// it cannot be the latch. + void setEarlyExiting(VPBlockBase *EarlyExitingBlock) { + assert(EarlyExitingBlock->getNumSuccessors() == 1 && + "Early exit block must have a successor."); + assert(EarlyExitingBlock->getParent() == this && + "Early exit block should already be in loop region"); + EarlyExiting = EarlyExitingBlock; + } + + const VPBlockBase *getEarlyExiting() const { return EarlyExiting; } + VPBlockBase *getEarlyExiting() { return EarlyExiting; } + + /// Return the number of exiting blocks from this region. It should match + /// the number of successors. + unsigned getNumExitingBlocks() const { return EarlyExiting ? 2 : 1; } + /// Returns the pre-header VPBasicBlock of the loop region. VPBasicBlock *getPreheaderVPBB() { assert(!isReplicator() && "should only get pre-header of loop regions"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index dfddb5b45f623..2f394eeaa544e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -186,7 +186,9 @@ static bool hasDuplicates(const SmallVectorImpl &VPBlockVec) { bool VPlanVerifier::verifyBlock(const VPBlockBase *VPB) { auto *VPBB = dyn_cast(VPB); // Check block's condition bit. - if (VPB->getNumSuccessors() > 1 || + // NOTE: A VPRegionBlock can legally have multiple successors due to + // early exits from the loop. + if ((VPB->getNumSuccessors() > 1 && !isa(VPB)) || (VPBB && VPBB->getParent() && VPBB->isExiting() && !VPBB->getParent()->isReplicator())) { if (!VPBB || !VPBB->getTerminator()) { @@ -210,6 +212,19 @@ bool VPlanVerifier::verifyBlock(const VPBlockBase *VPB) { return false; } + // If this is a loop region with multiple successors it must have as many + // exiting blocks as successors, even if the original scalar loop only had a + // single exit block. That's because in the vector loop we always create a + // middle block for the vector latch exit, which is distinct from the early + // exit. + auto *VPRB = dyn_cast(VPB); + if (VPRB && VPRB->getNumExitingBlocks() != VPB->getNumSuccessors()) { + errs() << "Number of exiting blocks (" << VPRB->getNumExitingBlocks() + << ") does not match number of successors (" + << VPB->getNumSuccessors() << ")!\n"; + return false; + } + for (const VPBlockBase *Succ : Successors) { // There must be a bi-directional link between block and successor. const auto &SuccPreds = Succ->getPredecessors(); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 9958d6ea124f8..95525484a3f4a 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -8,6 +8,7 @@ #include "../lib/Transforms/Vectorize/VPlanVerifier.h" #include "../lib/Transforms/Vectorize/VPlan.h" +#include "../lib/Transforms/Vectorize/VPlanTransforms.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "gtest/gtest.h" @@ -28,6 +29,10 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPBasicBlock *VPBB2 = new VPBasicBlock(); VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); + + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + VPlan Plan(VPPH, &*TC, VPBB1); #if GTEST_HAS_STREAM_REDIRECTION @@ -59,6 +64,9 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + auto TC = std::make_unique(); VPlan Plan(VPPH, &*TC, VPBB1); @@ -102,6 +110,9 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + auto TC = std::make_unique(); VPlan Plan(VPPH, &*TC, VPBB1); @@ -138,6 +149,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(VPBB1, R1); + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + auto TC = std::make_unique(); VPlan Plan(VPPH, &*TC, VPBB1); @@ -175,6 +189,9 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + auto TC = std::make_unique(); VPlan Plan(VPPH, &*TC, VPBB1); @@ -204,6 +221,9 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBB1->setParent(R1); + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + auto TC = std::make_unique(); VPlan Plan(VPPH, &*TC, VPBB1); @@ -217,4 +237,117 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) { #endif } +TEST(VPVerifierTest, LoopRegionMultipleSuccessors1) { + VPInstruction *TC = new VPInstruction(Instruction::Add, {}); + VPBasicBlock *VPBBPH = new VPBasicBlock("preheader"); + VPBBPH->appendRecipe(TC); + + VPInstruction *TC2 = new VPInstruction(Instruction::Add, {}); + VPBasicBlock *VPBBENTRY = new VPBasicBlock("entry"); + VPBBENTRY->appendRecipe(TC2); + + // Add a VPCanonicalIVPHIRecipe starting at 0 to the header. + auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(TC2, {}); + VPInstruction *I1 = new VPInstruction(Instruction::Add, {}); + VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1}); + VPInstruction *I3 = new VPInstruction(VPInstruction::BranchOnCond, {I1}); + + VPBasicBlock *RBB1 = new VPBasicBlock(); + RBB1->appendRecipe(CanonicalIVPHI); + RBB1->appendRecipe(I1); + RBB1->appendRecipe(I2); + RBB1->appendRecipe(I3); + RBB1->setName("bb1"); + + VPInstruction *I4 = new VPInstruction(Instruction::Mul, {I2, I1}); + VPInstruction *I5 = new VPInstruction(VPInstruction::BranchOnCond, {I4}); + VPBasicBlock *RBB2 = new VPBasicBlock(); + RBB2->appendRecipe(I4); + RBB2->appendRecipe(I5); + RBB2->setName("bb2"); + + VPRegionBlock *R1 = new VPRegionBlock(RBB1, RBB2, "R1"); + VPBlockUtils::connectBlocks(RBB1, RBB2); + VPBlockUtils::connectBlocks(VPBBENTRY, R1); + + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBasicBlock *VPEARLYEXIT = new VPBasicBlock("early.exit"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + VPBlockUtils::connectBlocks(R1, VPEARLYEXIT); + + VPlan Plan(VPBBPH, TC, VPBBENTRY); + Plan.setName("TestPlan"); + Plan.addVF(ElementCount::getFixed(4)); + Plan.getVectorLoopRegion()->setEarlyExiting(RBB1); + + EXPECT_TRUE(verifyVPlanIsValid(Plan)); +} + +TEST(VPVerifierTest, LoopRegionMultipleSuccessors2) { + VPInstruction *TC = new VPInstruction(Instruction::Add, {}); + VPBasicBlock *VPBBPH = new VPBasicBlock("preheader"); + VPBBPH->appendRecipe(TC); + + VPInstruction *TC2 = new VPInstruction(Instruction::Add, {}); + VPBasicBlock *VPBBENTRY = new VPBasicBlock("entry"); + VPBBENTRY->appendRecipe(TC2); + + // We can't create a live-in without a VPlan, but we can't create + // a VPlan without the blocks. So we initialize this to a silly + // value here, then fix it up later. + auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(TC2, {}); + VPInstruction *I1 = new VPInstruction(Instruction::Add, {}); + VPInstruction *I2 = new VPInstruction(Instruction::Sub, {I1}); + VPInstruction *I3 = new VPInstruction(VPInstruction::BranchOnCond, {I1}); + + VPBasicBlock *RBB1 = new VPBasicBlock(); + RBB1->appendRecipe(CanonicalIVPHI); + RBB1->appendRecipe(I1); + RBB1->appendRecipe(I2); + RBB1->appendRecipe(I3); + RBB1->setName("vector.body"); + + // This really is what the vplan cfg looks like before optimising! + VPBasicBlock *RBB2 = new VPBasicBlock(); + RBB2->setName("loop.inc"); + // A block that inherits the latch name from the original scalar loop. + + VPBasicBlock *RBB3 = new VPBasicBlock(); + // No name + + VPInstruction *I4 = new VPInstruction(Instruction::Mul, {I2, I1}); + VPInstruction *I5 = new VPInstruction(VPInstruction::BranchOnCond, {I4}); + VPBasicBlock *RBB4 = new VPBasicBlock(); + RBB4->appendRecipe(I4); + RBB4->appendRecipe(I5); + RBB4->setName("vector.latch"); + + VPRegionBlock *R1 = new VPRegionBlock(RBB1, RBB4, "R1"); + VPBlockUtils::insertBlockAfter(RBB2, RBB1); + VPBlockUtils::insertBlockAfter(RBB3, RBB2); + VPBlockUtils::insertBlockAfter(RBB4, RBB3); + VPBlockUtils::connectBlocks(VPBBENTRY, R1); + + VPBasicBlock *VPMIDDLE = new VPBasicBlock("middle.block"); + VPBasicBlock *VPEARLYEXIT = new VPBasicBlock("early.exit"); + VPBlockUtils::connectBlocks(R1, VPMIDDLE); + VPBlockUtils::connectBlocks(R1, VPEARLYEXIT); + + VPlan Plan(VPBBPH, TC, VPBBENTRY); + Plan.setName("TestPlan"); + Plan.addVF(ElementCount::getFixed(4)); + Plan.getVectorLoopRegion()->setEarlyExiting(RBB1); + + // Update the VPCanonicalIVPHIRecipe to have a live-in IR value. + LLVMContext C; + IntegerType *Int32 = IntegerType::get(C, 32); + Value *StartIV = PoisonValue::get(Int32); + CanonicalIVPHI->setStartValue(Plan.getOrAddLiveIn(StartIV)); + + EXPECT_TRUE(verifyVPlanIsValid(Plan)); + + VPlanTransforms::optimize(Plan, C); + EXPECT_TRUE(verifyVPlanIsValid(Plan)); +} + } // namespace