diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt index f8d08b980edb0..7dac6d0059b26 100644 --- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt +++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_component_library(LLVMVectorize VectorCombine.cpp VPlan.cpp VPlanAnalysis.cpp + VPlanConstruction.cpp VPlanHCFGBuilder.cpp VPlanRecipes.cpp VPlanSLP.cpp diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 74ddf906ff9fd..b987863127994 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9312,14 +9312,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { return !CM.requiresScalarEpilogue(VF.isVector()); }, Range); - VPlanPtr Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(), - PSE, RequiresScalarEpilogueCheck, - CM.foldTailByMasking(), OrigLoop); - + auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG. + // Convert to VPlan-transform and consoliate all transforms for VPlan + // creation. VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildHierarchicalCFG(); + VPlanTransforms::introduceTopLevelVectorLoopRegion( + *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, + CM.foldTailByMasking(), OrigLoop); + // Don't use getDecisionAndClampRange here, because we don't know the UF // so this function is better to be conservative, rather than to split // it up into different VPlans. @@ -9615,13 +9618,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) { assert(EnableVPlanNativePath && "VPlan-native path is not enabled."); // Create new empty VPlan - auto Plan = VPlan::createInitialVPlan(Legal->getWidestInductionType(), PSE, - true, false, OrigLoop); - + auto Plan = std::make_unique(OrigLoop); // Build hierarchical CFG VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan); HCFGBuilder.buildHierarchicalCFG(); + VPlanTransforms::introduceTopLevelVectorLoopRegion( + *Plan, Legal->getWidestInductionType(), PSE, true, false, OrigLoop); + for (ElementCount VF : Range) Plan->addVF(VF); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 563784e4af924..944a11b96325d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -880,85 +880,6 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } -VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, - PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, - bool TailFolded, Loop *TheLoop) { - auto Plan = std::make_unique(TheLoop); - VPBlockBase *ScalarHeader = Plan->getScalarHeader(); - - // Connect entry only to vector preheader initially. Entry will also be - // connected to the scalar preheader later, during skeleton creation when - // runtime guards are added as needed. Note that when executing the VPlan for - // an epilogue vector loop, the original entry block here will be replaced by - // a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after - // generating code for the main vector loop. - VPBasicBlock *VecPreheader = Plan->createVPBasicBlock("vector.ph"); - VPBlockUtils::connectBlocks(Plan->getEntry(), VecPreheader); - - // Create SCEV and VPValue for the trip count. - // We use the symbolic max backedge-taken-count, which works also when - // vectorizing loops with uncountable early exits. - const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); - assert(!isa(BackedgeTakenCountSCEV) && - "Invalid loop count"); - ScalarEvolution &SE = *PSE.getSE(); - const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, - InductionTy, TheLoop); - Plan->TripCount = - vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE); - - // Create VPRegionBlock, with empty header and latch blocks, to be filled - // during processing later. - VPBasicBlock *HeaderVPBB = Plan->createVPBasicBlock("vector.body"); - VPBasicBlock *LatchVPBB = Plan->createVPBasicBlock("vector.latch"); - VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); - auto *TopRegion = Plan->createVPRegionBlock( - HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/); - - VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); - VPBasicBlock *MiddleVPBB = Plan->createVPBasicBlock("middle.block"); - VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); - - VPBasicBlock *ScalarPH = Plan->createVPBasicBlock("scalar.ph"); - VPBlockUtils::connectBlocks(ScalarPH, ScalarHeader); - if (!RequiresScalarEpilogueCheck) { - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); - return Plan; - } - - // If needed, add a check in the middle block to see if we have completed - // all of the iterations in the first vector loop. Three cases: - // 1) If (N - N%VF) == N, then we *don't* need to run the remainder. - // Thus if tail is to be folded, we know we don't need to run the - // remainder and we can set the condition to true. - // 2) If we require a scalar epilogue, there is no conditional branch as - // we unconditionally branch to the scalar preheader. Do nothing. - // 3) Otherwise, construct a runtime check. - BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); - VPIRBasicBlock *VPExitBlock = Plan->getExitBlock(IRExitBlock); - // The connection order corresponds to the operands of the conditional branch. - VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); - VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); - - auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); - // Here we use the same DebugLoc as the scalar loop latch terminator instead - // of the corresponding compare because they may have ended up with - // different line numbers and we want to avoid awkward line stepping while - // debugging. Eg. if the compare has got a line number inside the loop. - VPBuilder Builder(MiddleVPBB); - VPValue *Cmp = - TailFolded - ? Plan->getOrAddLiveIn(ConstantInt::getTrue( - IntegerType::getInt1Ty(TripCount->getType()->getContext()))) - : Builder.createICmp(CmpInst::ICMP_EQ, Plan->getTripCount(), - &Plan->getVectorTripCount(), - ScalarLatchTerm->getDebugLoc(), "cmp.n"); - Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, - ScalarLatchTerm->getDebugLoc()); - return Plan; -} - void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, VPTransformState &State) { Type *TCTy = TripCountV->getType(); @@ -1135,11 +1056,13 @@ void VPlan::printLiveIns(raw_ostream &O) const { } O << "\n"; - if (TripCount->isLiveIn()) - O << "Live-in "; - TripCount->printAsOperand(O, SlotTracker); - O << " = original trip-count"; - O << "\n"; + if (TripCount) { + if (TripCount->isLiveIn()) + O << "Live-in "; + TripCount->printAsOperand(O, SlotTracker); + O << " = original trip-count"; + O << "\n"; + } } LLVM_DUMP_METHOD diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b277ed4816b8e..f68a2283c0c79 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3503,21 +3503,6 @@ class VPlan { VPBB->setPlan(this); } - /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping - /// original scalar pre-header) which contains SCEV expansions that need - /// to happen before the CFG is modified (when executing a VPlan for the - /// epilogue vector loop, the original entry needs to be replaced by a new - /// one); a VPBasicBlock for the vector pre-header, followed by a region for - /// the vector loop, followed by the middle VPBasicBlock. If a check is needed - /// to guard executing the scalar epilogue loop, it will be added to the - /// middle block, together with VPBasicBlocks for the scalar preheader and - /// exit blocks. \p InductionTy is the type of the canonical induction and - /// used for related values, like the trip count expression. - static VPlanPtr createInitialVPlan(Type *InductionTy, - PredicatedScalarEvolution &PSE, - bool RequiresScalarEpilogueCheck, - bool TailFolded, Loop *TheLoop); - /// Prepare the plan for execution, setting up the required live-in values. void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State); @@ -3579,11 +3564,18 @@ class VPlan { return TripCount; } + /// Set the trip count assuming it is currently null; if it is not - use + /// resetTripCount(). + void setTripCount(VPValue *NewTripCount) { + assert(!TripCount && NewTripCount && "TripCount should not be set yet."); + TripCount = NewTripCount; + } + /// Resets the trip count for the VPlan. The caller must make sure all uses of /// the original trip count have been replaced. void resetTripCount(VPValue *NewTripCount) { assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 && - "TripCount always must be set"); + "TripCount must be set when resetting"); TripCount = NewTripCount; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp new file mode 100644 index 0000000000000..f58f0290b5fa9 --- /dev/null +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -0,0 +1,101 @@ +//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements transforms for initial VPlan construction. +/// +//===----------------------------------------------------------------------===// + +#include "LoopVectorizationPlanner.h" +#include "VPlan.h" +#include "VPlanCFG.h" +#include "VPlanTransforms.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" + +using namespace llvm; + +void VPlanTransforms::introduceTopLevelVectorLoopRegion( + VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { + // TODO: Generalize to introduce all loop regions. + auto *HeaderVPBB = cast(Plan.getEntry()->getSingleSuccessor()); + VPBlockUtils::disconnectBlocks(Plan.getEntry(), HeaderVPBB); + + VPBasicBlock *OriginalLatch = + cast(HeaderVPBB->getSinglePredecessor()); + VPBlockUtils::disconnectBlocks(OriginalLatch, HeaderVPBB); + VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph"); + VPBlockUtils::connectBlocks(Plan.getEntry(), VecPreheader); + assert(OriginalLatch->getNumSuccessors() == 0 && + "Plan should end at top level latch"); + + // Create SCEV and VPValue for the trip count. + // We use the symbolic max backedge-taken-count, which works also when + // vectorizing loops with uncountable early exits. + const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); + assert(!isa(BackedgeTakenCountSCEV) && + "Invalid loop count"); + ScalarEvolution &SE = *PSE.getSE(); + const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, + InductionTy, TheLoop); + Plan.setTripCount( + vputils::getOrCreateVPValueForSCEVExpr(Plan, TripCount, SE)); + + // Create VPRegionBlock, with existing header and new empty latch block, to be + // filled. + VPBasicBlock *LatchVPBB = Plan.createVPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, OriginalLatch); + auto *TopRegion = Plan.createVPRegionBlock( + HeaderVPBB, LatchVPBB, "vector loop", false /*isReplicator*/); + // All VPBB's reachable shallowly from HeaderVPBB belong to top level loop, + // because VPlan is expected to end at top level latch. + for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPBB)) + VPBB->setParent(TopRegion); + + VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader); + VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block"); + VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion); + + VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph"); + VPBlockUtils::connectBlocks(ScalarPH, Plan.getScalarHeader()); + if (!RequiresScalarEpilogueCheck) { + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + return; + } + + // If needed, add a check in the middle block to see if we have completed + // all of the iterations in the first vector loop. Three cases: + // 1) If (N - N%VF) == N, then we *don't* need to run the remainder. + // Thus if tail is to be folded, we know we don't need to run the + // remainder and we can set the condition to true. + // 2) If we require a scalar epilogue, there is no conditional branch as + // we unconditionally branch to the scalar preheader. Do nothing. + // 3) Otherwise, construct a runtime check. + BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); + auto *VPExitBlock = Plan.getExitBlock(IRExitBlock); + // The connection order corresponds to the operands of the conditional branch. + VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); + VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH); + + auto *ScalarLatchTerm = TheLoop->getLoopLatch()->getTerminator(); + // Here we use the same DebugLoc as the scalar loop latch terminator instead + // of the corresponding compare because they may have ended up with + // different line numbers and we want to avoid awkward line stepping while + // debugging. Eg. if the compare has got a line number inside the loop. + VPBuilder Builder(MiddleVPBB); + VPValue *Cmp = + TailFolded + ? Plan.getOrAddLiveIn(ConstantInt::getTrue( + IntegerType::getInt1Ty(TripCount->getType()->getContext()))) + : Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(), + &Plan.getVectorTripCount(), + ScalarLatchTerm->getDebugLoc(), "cmp.n"); + Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, + ScalarLatchTerm->getDebugLoc()); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp index a5e8e852bace8..4b8a2420b3037 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp @@ -23,6 +23,7 @@ #include "VPlanHCFGBuilder.h" #include "LoopVectorizationPlanner.h" +#include "VPlanCFG.h" #include "llvm/Analysis/LoopIterator.h" #define DEBUG_TYPE "loop-vectorize" @@ -178,9 +179,10 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { VPBasicBlock *VPBB = Plan.createVPBasicBlock(Name); BB2VPBB[BB] = VPBB; - // Get or create a region for the loop containing BB. + // Get or create a region for the loop containing BB, except for the top + // region of TheLoop which is created later. Loop *LoopOfBB = LI->getLoopFor(BB); - if (!LoopOfBB || !doesContainLoop(LoopOfBB, TheLoop)) + if (!LoopOfBB || LoopOfBB == TheLoop || !doesContainLoop(LoopOfBB, TheLoop)) return VPBB; auto *RegionOfVPBB = Loop2Region.lookup(LoopOfBB); @@ -194,12 +196,8 @@ VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) { assert(!RegionOfVPBB && "First visit of a header basic block expects to register its region."); // Handle a header - take care of its Region. - if (LoopOfBB == TheLoop) { - RegionOfVPBB = Plan.getVectorLoopRegion(); - } else { - RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/); - RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); - } + RegionOfVPBB = Plan.createVPRegionBlock(Name.str(), false /*isReplicator*/); + RegionOfVPBB->setParent(Loop2Region[LoopOfBB->getParentLoop()]); RegionOfVPBB->setEntry(VPBB); Loop2Region[LoopOfBB] = RegionOfVPBB; return VPBB; @@ -353,29 +351,6 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, // Main interface to build the plain CFG. void PlainCFGBuilder::buildPlainCFG( DenseMap &VPB2IRBB) { - // 0. Reuse the top-level region, vector-preheader and exit VPBBs from the - // skeleton. These were created directly rather than via getOrCreateVPBB(), - // revisit them now to update BB2VPBB. Note that header/entry and - // latch/exiting VPBB's of top-level region have yet to be created. - VPRegionBlock *TheRegion = Plan.getVectorLoopRegion(); - BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); - assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && - "Unexpected loop preheader"); - auto *VectorPreheaderVPBB = - cast(TheRegion->getSinglePredecessor()); - // ThePreheaderBB conceptually corresponds to both Plan.getPreheader() (which - // wraps the original preheader BB) and Plan.getEntry() (which represents the - // new vector preheader); here we're interested in setting BB2VPBB to the - // latter. - BB2VPBB[ThePreheaderBB] = VectorPreheaderVPBB; - Loop2Region[LI->getLoopFor(TheLoop->getHeader())] = TheRegion; - - // The existing vector region's entry and exiting VPBBs correspond to the loop - // header and latch. - VPBasicBlock *VectorHeaderVPBB = TheRegion->getEntryBasicBlock(); - VPBasicBlock *VectorLatchVPBB = TheRegion->getExitingBasicBlock(); - BB2VPBB[TheLoop->getHeader()] = VectorHeaderVPBB; - VectorHeaderVPBB->clearSuccessors(); // 1. Scan the body of the loop in a topological order to visit each basic // block after having visited its predecessor basic blocks. Create a VPBB for @@ -386,6 +361,9 @@ void PlainCFGBuilder::buildPlainCFG( // Loop PH needs to be explicitly visited since it's not taken into account by // LoopBlocksDFS. + BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader(); + assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) && + "Unexpected loop preheader"); for (auto &I : *ThePreheaderBB) { if (I.getType()->isVoidTy()) continue; @@ -403,21 +381,18 @@ void PlainCFGBuilder::buildPlainCFG( // Set VPBB predecessors in the same order as they are in the incoming BB. if (!isHeaderBB(BB, LoopForBB)) { setVPBBPredsFromBB(VPBB, BB); - } else { - // BB is a loop header, set the predecessor for the region, except for the - // top region, whose predecessor was set when creating VPlan's skeleton. - assert(isHeaderVPBB(VPBB) && "isHeaderBB and isHeaderVPBB disagree"); - if (TheRegion != Region) - setRegionPredsFromBB(Region, BB); + } else if (Region) { + // BB is a loop header and there's a corresponding region, set the + // predecessor for it. + setRegionPredsFromBB(Region, BB); } // Create VPInstructions for BB. createVPInstructionsForVPBB(VPBB, BB); - if (TheLoop->getLoopLatch() == BB) { - VPBB->setOneSuccessor(VectorLatchVPBB); - VectorLatchVPBB->clearPredecessors(); - VectorLatchVPBB->setPredecessors({VPBB}); + if (BB == TheLoop->getLoopLatch()) { + VPBasicBlock *HeaderVPBB = getOrCreateVPBB(LoopForBB->getHeader()); + VPBlockUtils::connectBlocks(VPBB, HeaderVPBB); continue; } @@ -451,9 +426,8 @@ void PlainCFGBuilder::buildPlainCFG( if (BB == LoopForBB->getLoopLatch()) { // For a latch we need to set the successor of the region rather than that // of VPBB and it should be set to the exit, i.e., non-header successor, - // except for the top region, whose successor was set when creating - // VPlan's skeleton. - assert(TheRegion != Region && + // except for the top region, which is handled elsewhere. + assert(LoopForBB != TheLoop && "Latch of the top region should have been handled earlier"); Region->setOneSuccessor(isHeaderVPBB(Successor0) ? Successor1 : Successor0); @@ -482,6 +456,9 @@ void PlainCFGBuilder::buildPlainCFG( // corresponding VPlan operands. fixHeaderPhis(); + VPBlockUtils::connectBlocks(Plan.getEntry(), + getOrCreateVPBB(TheLoop->getHeader())); + for (const auto &[IRBB, VPB] : BB2VPBB) VPB2IRBB[VPB] = IRBB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index a48bf31cd0af3..8a9a81e00fe23 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -52,6 +52,21 @@ struct VPlanTransforms { verifyVPlanIsValid(Plan); } + /// Introduce the top-level VPRegionBlock for the main loop in \p Plan. Coming + /// into this function, \p Plan's top-level loop is modeled using a plain CFG. + /// This transform wraps the plain CFG of the top-level loop within a + /// VPRegionBlock and creates a VPValue expression for the original trip + /// count. It will also introduce a dedicated VPBasicBlock for the vector + /// pre-header as well a VPBasicBlock as exit block of the region + /// (middle.block). If a check is needed to guard executing the scalar + /// epilogue loop, it will be added to the middle block, together with + /// VPBasicBlocks for the scalar preheader and exit blocks. \p InductionTy is + /// the type of the canonical induction and used for related values, like the + /// trip count expression. + static void introduceTopLevelVectorLoopRegion( + VPlan &Plan, Type *InductionTy, PredicatedScalarEvolution &PSE, + bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop); + /// Replaces the VPInstructions in \p Plan with corresponding /// widen recipes. static void diff --git a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll index 4e5fc3126435d..7e90724744fb3 100644 --- a/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll +++ b/llvm/test/Transforms/LoopVectorize/outer_loop_hcfg_construction.ll @@ -32,10 +32,10 @@ define void @non_outermost_loop_hcfg_construction(i64 %n, ptr %a) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_LATCH4:%.*]] ] @@ -173,10 +173,10 @@ define void @non_outermost_loop_hcfg_construction_other_loops_at_same_level(i64 ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[ADD]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[MIDDLE_LOOP_J0_CLEANUP4:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll index 52b2bcd9aac11..625a32c098f94 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-outer-loop.ll @@ -8,62 +8,33 @@ define void @foo(i64 %n) { ; CHECK: VPlan 'HCFGBuilder: Plain CFG ; CHECK-NEXT: { -; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count -; CHECK-NEXT: Live-in ir<8> = original trip-count ; CHECK-EMPTY: ; CHECK-NEXT: ir-bb: -; CHECK-NEXT: Successor(s): vector.ph +; CHECK-NEXT: Successor(s): vector.body ; CHECK-EMPTY: -; CHECK-NEXT: vector.ph: -; CHECK-NEXT: Successor(s): vector loop +; CHECK-NEXT: vector.body: +; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> +; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> +; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> +; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> +; CHECK-NEXT: Successor(s): inner ; CHECK-EMPTY: -; CHECK-NEXT: vector loop: { -; CHECK-NEXT: vector.body: -; CHECK-NEXT: WIDEN-PHI ir<%outer.iv> = phi ir<0>, ir<%outer.iv.next> -; CHECK-NEXT: EMIT ir<%gep.1> = getelementptr ir<@arr2>, ir<0>, ir<%outer.iv> -; CHECK-NEXT: EMIT store ir<%outer.iv>, ir<%gep.1> -; CHECK-NEXT: EMIT ir<%add> = add ir<%outer.iv>, ir<%n> -; CHECK-NEXT: Successor(s): inner -; CHECK-EMPTY: -; CHECK-NEXT: inner: { -; CHECK-NEXT: inner: -; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> -; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> -; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> -; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> -; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> -; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): outer.latch -; CHECK-EMPTY: -; CHECK-NEXT: outer.latch: -; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> -; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> -; CHECK-NEXT: Successor(s): vector.latch -; CHECK-EMPTY: -; CHECK-NEXT: vector.latch: +; CHECK-NEXT: inner: { +; CHECK-NEXT: inner: +; CHECK-NEXT: WIDEN-PHI ir<%inner.iv> = phi ir<0>, ir<%inner.iv.next> +; CHECK-NEXT: EMIT ir<%gep.2> = getelementptr ir<@arr>, ir<0>, ir<%inner.iv>, ir<%outer.iv> +; CHECK-NEXT: EMIT store ir<%add>, ir<%gep.2> +; CHECK-NEXT: EMIT ir<%inner.iv.next> = add ir<%inner.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%inner.ec> = icmp ir<%inner.iv.next>, ir<8> +; CHECK-NEXT: EMIT branch-on-cond ir<%inner.ec> ; CHECK-NEXT: No successors -; CHECK-NEXT: } -; CHECK-NEXT: Successor(s): middle.block -; CHECK-EMPTY: -; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[C:%.+]]> = icmp eq ir<8>, vp<[[VTC]]> -; CHECK-NEXT: EMIT branch-on-cond vp<[[C]]> -; CHECK-NEXT: Successor(s): ir-bb, scalar.ph -; CHECK-EMPTY: -; CHECK-NEXT: scalar.ph: -; CHECK-NEXT: Successor(s): ir-bb -; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: IR %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] -; CHECK-NEXT: IR %gep.1 = getelementptr inbounds [8 x i64], ptr @arr2, i64 0, i64 %outer.iv -; CHECK-NEXT: IR store i64 %outer.iv, ptr %gep.1, align 4 -; CHECK-NEXT: IR %add = add nsw i64 %outer.iv, %n -; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): outer.latch ; CHECK-EMPTY: -; CHECK-NEXT: ir-bb: -; CHECK-NEXT: No successors +; CHECK-NEXT: outer.latch: +; CHECK-NEXT: EMIT ir<%outer.iv.next> = add ir<%outer.iv>, ir<1> +; CHECK-NEXT: EMIT ir<%outer.ec> = icmp ir<%outer.iv.next>, ir<8> +; CHECK-NEXT: Successor(s): vector.body ; CHECK-NEXT: } entry: br label %outer.header diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h index 8d03e91fb26c3..caf5d2357411d 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h +++ b/llvm/unittests/Transforms/Vectorize/VPlanTestBase.h @@ -14,6 +14,7 @@ #include "../lib/Transforms/Vectorize/VPlan.h" #include "../lib/Transforms/Vectorize/VPlanHCFGBuilder.h" +#include "../lib/Transforms/Vectorize/VPlanTransforms.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -70,10 +71,11 @@ class VPlanTestIRBase : public testing::Test { Loop *L = LI->getLoopFor(LoopHeader); PredicatedScalarEvolution PSE(*SE, *L); - auto Plan = VPlan::createInitialVPlan(IntegerType::get(*Ctx, 64), PSE, true, - false, L); + auto Plan = std::make_unique(L); VPlanHCFGBuilder HCFGBuilder(L, LI.get(), *Plan); HCFGBuilder.buildHierarchicalCFG(); + VPlanTransforms::introduceTopLevelVectorLoopRegion( + *Plan, IntegerType::get(*Ctx, 64), PSE, true, false, L); return Plan; } };