From 1ea4a7c01a8439be774d05ed33867be545196b23 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 7 Jun 2024 11:18:55 +0100 Subject: [PATCH 1/5] [VPlan] Introduce ExitPhi VPInstruction, use to create phi for FOR. This patch introduces a new ExitPhi VPInstruction which creates a phi in a leaf block of a VPlan. The first use is to create the phi node for fixed-order recurrence resume values in the scalar preheader. The VPInstruction takes 2 operands: 1) the incoming value from the middle-block and a default value to be used for all other incoming blocks. In follow-up changes, it will also be used to create phis for reduction and induction resume values. --- .../Transforms/Vectorize/LoopVectorize.cpp | 80 ++++++----- llvm/lib/Transforms/Vectorize/VPlan.cpp | 20 ++- llvm/lib/Transforms/Vectorize/VPlan.h | 19 ++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 33 ++++- .../Transforms/Vectorize/VPlanTransforms.cpp | 8 -- .../first-order-recurrence-fold-tail.ll | 4 +- .../AArch64/fixed-order-recurrence.ll | 8 +- .../LoopVectorize/AArch64/induction-costs.ll | 12 +- .../AArch64/loop-vectorization-factors.ll | 8 +- .../AArch64/reduction-recurrence-costs-sve.ll | 8 +- .../AArch64/sve-interleaved-accesses.ll | 6 +- .../first-order-recurrence-scalable-vf1.ll | 4 +- .../predicated-first-order-recurrence.ll | 4 +- .../X86/fixed-order-recurrence.ll | 12 +- .../LoopVectorize/X86/induction-costs.ll | 10 +- .../Transforms/LoopVectorize/X86/pr72969.ll | 4 +- .../first-order-recurrence-chains-vplan.ll | 15 +- .../first-order-recurrence-chains.ll | 57 ++++---- .../first-order-recurrence-complex.ll | 36 ++--- ...t-order-recurrence-multiply-recurrences.ll | 8 +- ...-order-recurrence-sink-replicate-region.ll | 18 ++- .../LoopVectorize/first-order-recurrence.ll | 134 +++++++++--------- .../Transforms/LoopVectorize/induction.ll | 42 +++--- .../interleave-and-scalarize-only.ll | 3 +- .../LoopVectorize/interleaved-accesses.ll | 6 +- llvm/test/Transforms/LoopVectorize/pr36983.ll | 4 +- .../scalable-first-order-recurrence.ll | 16 +-- .../LoopVectorize/vplan-printing.ll | 5 +- 28 files changed, 326 insertions(+), 258 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 56fb8a10d7334..20e2627523eb2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -607,10 +607,6 @@ class InnerLoopVectorizer { BasicBlock *MiddleBlock, BasicBlock *VectorHeader, VPlan &Plan, VPTransformState &State); - /// Create the phi node for the resume value of first order recurrences in the - /// scalar preheader and update the users in the scalar loop. - void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State); - /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. void sinkScalarOperands(Instruction *PredInst); @@ -3315,8 +3311,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) { if (!Legal->isFixedOrderRecurrence(LO->getPhi())) continue; - fixFixedOrderRecurrence(LO, State); - Plan.removeLiveOut(LO->getPhi()); } // Forget the original basic block. @@ -3386,33 +3380,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VF.getKnownMinValue() * UF); } -void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO, - VPTransformState &State) { - // Extract the last vector element in the middle block. This will be the - // initial value for the recurrence when jumping to the scalar loop. - VPValue *VPExtract = LO->getOperand(0); - using namespace llvm::VPlanPatternMatch; - assert(match(VPExtract, m_VPInstruction( - m_VPValue(), m_VPValue())) && - "FOR LiveOut expects to use an extract from end."); - Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true); - - // Fix the initial value of the original recurrence in the scalar loop. - PHINode *ScalarHeaderPhi = LO->getPhi(); - auto *InitScalarFOR = - ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader); - Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin()); - auto *ScalarPreheaderPhi = - Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init"); - for (auto *BB : predecessors(LoopScalarPreHeader)) { - auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR; - ScalarPreheaderPhi->addIncoming(Incoming, BB); - } - ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader, - ScalarPreheaderPhi); - ScalarHeaderPhi->setName("scalar.recur"); -} - void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { // The basic block and loop containing the predicated instruction. auto *PredBB = PredInst->getParent(); @@ -8463,7 +8430,9 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop, Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan); - Plan.addLiveOut(&ExitPhi, V); + Plan.addLiveOut( + &ExitPhi, V, + cast(Plan.getVectorLoopRegion()->getSingleSuccessor())); } } @@ -8635,6 +8604,49 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); + auto *MiddleVPBB = + cast(Plan->getVectorLoopRegion()->getSingleSuccessor()); + + VPBasicBlock *ScalarPH = nullptr; + for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) { + auto *VPBB = dyn_cast(Succ); + if (VPBB && !isa(VPBB)) { + ScalarPH = VPBB; + break; + } + } + + if (ScalarPH) { + for (auto &H : HeaderVPBB->phis()) { + auto *FOR = dyn_cast(&H); + if (!FOR) + continue; + VPBuilder B(ScalarPH); + VPBuilder MiddleBuilder; + // Set insert point so new recipes are inserted before terminator and + // condition, if there is either the former or both. + if (MiddleVPBB->getNumSuccessors() != 2) + MiddleBuilder.setInsertPoint(MiddleVPBB); + else if (isa(MiddleVPBB->getTerminator()->getOperand(0))) + MiddleBuilder.setInsertPoint( + &*std::prev(MiddleVPBB->getTerminator()->getIterator())); + else + MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator()); + + // Extract the resume value and create a new VPLiveOut for it. + auto *Resume = MiddleBuilder.createNaryOp( + VPInstruction::ExtractFromEnd, + {FOR->getBackedgeValue(), + Plan->getOrAddLiveIn( + ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))}, + {}, "vector.recur.extract"); + auto *R = + B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()}, + {}, "scalar.recur.init"); + Plan->addLiveOut(cast(FOR->getUnderlyingInstr()), R, ScalarPH); + } + } + // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 20b72b1609ca4..be5ce5cae41c7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -909,6 +909,11 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, /// VPBB, if any, are rewired to the new VPIRBasicBlock. static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB); + for (auto &[_, LO] : VPBB->getPlan()->getLiveOuts()) { + if (LO->getPred() == VPBB) + LO->setPred(IRMiddleVPBB); + } + for (auto &R : make_early_inc_range(*VPBB)) R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end()); VPBlockBase *PredVPBB = VPBB->getSinglePredecessor(); @@ -1124,9 +1129,9 @@ LLVM_DUMP_METHOD void VPlan::dump() const { print(dbgs()); } #endif -void VPlan::addLiveOut(PHINode *PN, VPValue *V) { +void VPlan::addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred) { assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); - LiveOuts.insert({PN, new VPLiveOut(PN, V)}); + LiveOuts.insert({PN, new VPLiveOut(PN, V, Pred)}); } static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, @@ -1195,9 +1200,18 @@ VPlan *VPlan::duplicate() { remapOperands(Preheader, NewPreheader, Old2NewVPValues); remapOperands(Entry, NewEntry, Old2NewVPValues); + DenseMap Old2NewVPBlocks; + VPBlockBase *OldMiddle = getVectorLoopRegion()->getSingleSuccessor(); + VPBlockBase *NewMiddle = NewPlan->getVectorLoopRegion()->getSingleSuccessor(); + Old2NewVPBlocks[OldMiddle] = NewMiddle; + for (const auto &[Old, New] : + zip(OldMiddle->getSuccessors(), NewMiddle->getSuccessors())) + Old2NewVPBlocks[Old] = New; + // Clone live-outs. for (const auto &[_, LO] : LiveOuts) - NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]); + NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)], + cast(Old2NewVPBlocks[LO->getPred()])); // Initialize remaining fields of cloned VPlan. NewPlan->VFs = VFs; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f397c7a6e1036..8d3c681cd903d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -686,9 +686,13 @@ class VPBlockBase { class VPLiveOut : public VPUser { PHINode *Phi; + /// Predecessor in VPlan of this live-out. Used to as block to set the + /// incoming value for. + VPBasicBlock *Pred; + public: - VPLiveOut(PHINode *Phi, VPValue *Op) - : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} + VPLiveOut(PHINode *Phi, VPValue *Op, VPBasicBlock *Pred) + : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi), Pred(Pred) {} static inline bool classof(const VPUser *U) { return U->getVPUserID() == VPUser::VPUserID::LiveOut; @@ -710,6 +714,11 @@ class VPLiveOut : public VPUser { PHINode *getPhi() const { return Phi; } + /// Returns to incoming block for which to set the value. + VPBasicBlock *getPred() const { return Pred; } + + void setPred(VPBasicBlock *Pred) { this->Pred = Pred; } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the VPLiveOut to \p O. void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; @@ -1191,6 +1200,10 @@ class VPInstruction : public VPRecipeWithIRFlags { SLPStore, ActiveLaneMask, ExplicitVectorLength, + /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan. + /// The first operand is the incoming value from the predecessor in VPlan, + /// the second operand is the incoming value for all other predecessors. + ExitPhi, CalculateTripCountMinusVF, // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, @@ -3333,7 +3346,7 @@ class VPlan { return cast(&*EntryVPBB->begin()); } - void addLiveOut(PHINode *PN, VPValue *V); + void addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred); void removeLiveOut(PHINode *PN) { delete LiveOuts[PN]; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b051f9d5e4a69..2ffad74cbbe62 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -193,11 +193,12 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { VPValue *ExitValue = getOperand(0); if (vputils::isUniformAfterVectorization(ExitValue)) Lane = VPLane::getFirstLane(); - VPBasicBlock *MiddleVPBB = - cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); - BasicBlock *MiddleBB = State.CFG.VPBB2IRBB[MiddleVPBB]; - Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), - MiddleBB); + BasicBlock *PredBB = State.CFG.VPBB2IRBB[Pred]; + Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); + if (Phi->getBasicBlockIndex(PredBB) != -1) + Phi->setIncomingValueForBlock(PredBB, V); + else + Phi->addIncoming(V, PredBB); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -303,6 +304,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: + case VPInstruction::ExitPhi: return true; default: return false; @@ -593,6 +595,23 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); return Builder.CreatePtrAdd(Ptr, Addend, Name); } + case VPInstruction::ExitPhi: { + if (Part != 0) + return State.get(this, 0, /*IsScalar*/ true); + Value *IncomingFromVPlanPred = + State.get(getOperand(0), Part, /* IsScalar */ true); + Value *IncomingFromOtherPred = + State.get(getOperand(1), Part, /* IsScalar */ true); + auto *NewPhi = Builder.CreatePHI(IncomingFromOtherPred->getType(), 2, Name); + BasicBlock *VPlanPred = + State.CFG + .VPBB2IRBB[cast(getParent()->getSinglePredecessor())]; + NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); + for (auto *BB : predecessors(Builder.GetInsertBlock())) + NewPhi->addIncoming(IncomingFromOtherPred, BB); + return NewPhi; + } + default: llvm_unreachable("Unsupported opcode for instruction"); } @@ -600,6 +619,7 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || + getOpcode() == VPInstruction::ExitPhi || getOpcode() == VPInstruction::ComputeReductionResult; } @@ -729,6 +749,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; + case VPInstruction::ExitPhi: + O << "exit-phi"; + break; case VPInstruction::ExplicitVectorLength: O << "EXPLICIT-VECTOR-LENGTH"; break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index fc213320d5a35..5a773e8f35563 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -945,14 +945,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, {}, "vector.recur.extract.for.phi")); RecurSplice->replaceUsesWithIf( Penultimate, [](VPUser &U, unsigned) { return isa(&U); }); - - // Extract the resume value and create a new VPLiveOut for it. - auto *Resume = MiddleBuilder.createNaryOp( - VPInstruction::ExtractFromEnd, - {FOR->getBackedgeValue(), - Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))}, - {}, "vector.recur.extract"); - Plan.addLiveOut(cast(FOR->getUnderlyingInstr()), Resume); } return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll index fda2cbef0d585..e9c9288e73439 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll @@ -73,8 +73,8 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP24]], i32 3 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1004, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index f728a3f74779c..6c21dc68e92ce 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -51,8 +51,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -160,10 +160,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT7:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 4123324a056f1..3fd957ccb84f7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -124,13 +124,13 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -187,13 +187,13 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 1532c76993224..61bd8c51e1605 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -789,8 +789,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -868,13 +868,13 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: [[A_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 3c3189a5a8b83..2b2c224dbefe0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -107,9 +107,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: -; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP60]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: @@ -227,9 +227,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[VECTOR_RECUR_EXTRACT8:%.*]] = extractelement [[TMP22]], i32 [[TMP51]] ; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; PRED: scalar.ph: -; PRED-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; PRED-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; PRED-NEXT: [[SCALAR_RECUR_INIT9:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; PRED-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP45]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; PRED-NEXT: br label [[LOOP:%.*]] ; PRED: loop: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 1c132fffa538d..d6794420c403f 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1516,9 +1516,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index 0db335e43db5b..9dd594791eb64 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -28,12 +28,12 @@ define i64 @pr97452_scalable_vf1_for(ptr %src) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[WIDE_LOAD1]], i32 2 ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[L:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll index 47f9c7abdc912..9a1e613a736bf 100644 --- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll @@ -66,8 +66,8 @@ define void @func_21() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[TMP12]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 6, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 6, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 2b51e4330e9ad..621888c144852 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -51,8 +51,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -160,10 +160,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE45]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT8:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE44]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[VECTOR_RECUR_EXTRACT7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -244,8 +244,8 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[REC_START]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[REC_START]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 968dc09bec4e6..afd09716759d9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -319,11 +319,11 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY]] ], [ 100, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY:%.*]] ], [ 100, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ] @@ -395,12 +395,12 @@ define i16 @iv_and_step_trunc() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 23d816297ce59..b88f413ff1b81 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -86,9 +86,9 @@ define void @test(ptr %p) { ; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: -; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ] -; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ], [ 1, [[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ], [ 1, [[VECTOR_SCEVCHECK]] ] ; VEC-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY]] ] ; VEC-NEXT: br label [[FOR_BODY:%.*]] ; VEC: for.body: ; VEC-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IDX:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 443f473803668..55c49517ef8fa 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -43,10 +43,12 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = exit-phi vp<[[RESUME_2]]>, ir<33> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> -; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]> +; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> +; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]> ; CHECK-NEXT: } ; entry: @@ -113,11 +115,14 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = exit-phi vp<[[RESUME_2]]>, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]> = exit-phi vp<[[RESUME_3]]>, ir<33> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> -; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]> -; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3]]> +; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> +; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2_P]]> +; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3_P]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index a861d9c5fb7f0..94f35ad453670 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -18,10 +18,10 @@ define i16 @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; entry: br label %loop @@ -61,10 +61,10 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body, !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; entry: br label %loop @@ -107,12 +107,12 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; entry: br label %loop @@ -219,12 +219,12 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; entry: br label %loop @@ -270,12 +270,12 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; entry: br label %loop @@ -321,12 +321,12 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr) ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; entry: br label %loop @@ -371,12 +371,12 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr % ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; entry: br label %loop @@ -420,10 +420,10 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body, !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; entry: br label %loop @@ -488,8 +488,8 @@ define i64 @test_first_order_recurrences_and_induction(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2 ; CHECK-NEXT: br i1 true entry: @@ -528,8 +528,8 @@ define i64 @test_first_order_recurrences_and_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2 ; CHECK-NEXT: br i1 true ; entry: @@ -568,8 +568,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction1(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 ; CHECK-NEXT: br i1 true ; entry: @@ -611,8 +611,8 @@ define ptr @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2 ; CHECK-NEXT: br i1 true ; entry: @@ -657,13 +657,14 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: br i1 true, label %End, label %scalar.ph ; CHECK: scalar.ph: -; CHECK-NEXT: phi double [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ], [ 0.000000e+00, %Entry ] -; CHECK-NEXT: phi double [ [[TMP3]], %middle.block ], [ 0.000000e+00, %Entry ] +; CHECK-NEXT: %bc.resume.val = phi i64 [ 0, %middle.block ], [ 0, %Entry ] ; CHECK-NEXT: phi double [ [[TMP0]], %middle.block ], [ 0.000000e+00, %Entry ] +; CHECK-NEXT: phi double [ [[TMP3]], %middle.block ], [ 0.000000e+00, %Entry ] +; CHECK-NEXT: phi double [ [[VECTOR_RECUR_EXTRACT9]], %middle.block ], [ 0.000000e+00, %Entry ] ; CHECK: End: ; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP0]], %middle.block ] ; CHECK-NEXT: = phi double [ {{.+}}, %Loop ], [ [[TMP3]], %middle.block ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index f780902e58d4b..371c58e8eb9cc 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -41,8 +41,8 @@ define void @can_sink_after_store(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] @@ -121,8 +121,8 @@ define void @sink_sdiv(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] @@ -202,8 +202,8 @@ define void @can_sink_with_additional_user(i32 %x, ptr %ptr, i64 %tc) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ] ; CHECK-NEXT: br label [[FOR:%.*]] ; CHECK: for: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ] @@ -387,9 +387,9 @@ define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[BB74:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[BB:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[BB]] ] ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb13: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ] @@ -463,9 +463,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] @@ -554,9 +554,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] @@ -696,8 +696,8 @@ define i16 @multiple_exit(ptr %p, i32 %n) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -772,8 +772,8 @@ define i16 @multiple_exit2(ptr %p, i32 %n) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] @@ -852,8 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -935,8 +935,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ] @@ -1057,9 +1057,9 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] @@ -1125,9 +1125,9 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll index f9e7768ca20e9..779836ee9e91e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll @@ -123,9 +123,9 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -246,9 +246,9 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias % ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 39a3f1064b2eb..3412527ad4b77 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -81,9 +81,10 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]> ; CHECK-NEXT: } ; entry: @@ -163,9 +164,10 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> ; CHECK-NEXT: } ; entry: @@ -244,10 +246,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %res = vp<[[RED_RES]]> -; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> ; CHECK-NEXT: } ; entry: @@ -351,9 +354,10 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]> ; CHECK-NEXT: } ; entry: @@ -444,9 +448,10 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> ; CHECK-NEXT: } ; entry: @@ -526,9 +531,10 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: -; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1_P]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index f48a0d80de318..10b5aa64c180a 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -59,8 +59,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] @@ -115,8 +115,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ] @@ -169,8 +169,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[SCALAR_BODY]] ] @@ -271,8 +271,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup.loopexit: @@ -340,8 +340,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup.loopexit: @@ -404,8 +404,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ poison, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup.loopexit: @@ -534,8 +534,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[SCALAR_BODY]] ] @@ -607,8 +607,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] @@ -678,8 +678,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] @@ -916,13 +916,13 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: ; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] @@ -965,8 +965,8 @@ define i32 @PR27246() { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: ; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] @@ -1009,13 +1009,13 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]] ; SINK-AFTER: for.cond.cleanup: ; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ] @@ -1120,8 +1120,8 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP34]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1166,8 +1166,8 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1225,8 +1225,8 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[PRE_LOAD]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1272,8 +1272,8 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: ; UNROLL-NO-IC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1302,8 +1302,8 @@ define i64 @constant_folded_previous_value() { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: ; UNROLL-NO-VF-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1330,8 +1330,8 @@ define i64 @constant_folded_previous_value() { ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: ; SINK-AFTER-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] @@ -1385,12 +1385,12 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -1422,8 +1422,8 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -1454,12 +1454,12 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] @@ -1580,9 +1580,9 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.cond.cleanup: @@ -1640,9 +1640,9 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.cond.cleanup: @@ -1709,9 +1709,9 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP20]]) ; SINK-AFTER-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ [[J]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.cond.cleanup: @@ -1811,8 +1811,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ] @@ -1867,8 +1867,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] @@ -1919,8 +1919,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] @@ -2049,8 +2049,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP49:%.*]], [[FOR_BODY]] ] @@ -2109,8 +2109,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] @@ -2176,8 +2176,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[FOR_BODY]] ] @@ -2277,8 +2277,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] @@ -2336,8 +2336,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] @@ -2390,8 +2390,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: ; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] @@ -2577,9 +2577,9 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-IC: for.cond: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] @@ -2627,9 +2627,9 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT2:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT2:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-VF: for.cond: ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] @@ -2676,9 +2676,9 @@ define void @sink_dead_inst(ptr %a) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3 ; SINK-AFTER-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]] ; SINK-AFTER: for.cond: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] @@ -2830,8 +2830,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29:![0-9]+]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-IC: bb1: @@ -2895,8 +2895,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29:![0-9]+]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: @@ -2983,8 +2983,8 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29:![0-9]+]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: @@ -3198,9 +3198,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP75]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-IC-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-IC: bb1: @@ -3282,9 +3282,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP17]], [[TMP16]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; UNROLL-NO-VF-NEXT: br label [[BB2:%.*]] ; UNROLL-NO-VF: bb1: @@ -3406,9 +3406,9 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]], !prof [[PROF29]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[BB:%.*]] ] ; SINK-AFTER-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP39]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ] ; SINK-AFTER-NEXT: br label [[BB2:%.*]] ; SINK-AFTER: bb1: @@ -3483,12 +3483,12 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; UNROLL-NO-IC-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -3535,8 +3535,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-VF: loop: ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -3578,12 +3578,12 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 ; SINK-AFTER-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] ; SINK-AFTER: loop: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -3651,8 +3651,8 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 997, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-IC: for.cond: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] @@ -3687,8 +3687,8 @@ define void @unused_recurrence(ptr %a) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]] ; UNROLL-NO-VF: for.cond: ; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] @@ -3720,8 +3720,8 @@ define void @unused_recurrence(ptr %a) { ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 1001, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[FOR_COND:%.*]] ; SINK-AFTER: for.cond: ; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ] @@ -3768,8 +3768,8 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] @@ -3795,8 +3795,8 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-VF: loop: ; UNROLL-NO-VF-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] @@ -3824,8 +3824,8 @@ define i32 @recurence_uniform_load(ptr %src, ptr noalias %dst) { ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP0]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; SINK-AFTER-NEXT: br label [[LOOP:%.*]] ; SINK-AFTER: loop: ; SINK-AFTER-NEXT: [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 5b7d2207fbaa3..c0bd7bb06ba25 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -5583,9 +5583,9 @@ define i64 @trunc_with_first_order_recurrence() { ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: exit: @@ -5803,9 +5803,9 @@ define i64 @trunc_with_first_order_recurrence() { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD5]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 42, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: exit: @@ -5973,9 +5973,9 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1 ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -6100,9 +6100,9 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -6260,9 +6260,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6331,9 +6331,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: -; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] -; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] -; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; IND-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; IND-NEXT: br label [[LOOP:%.*]] ; IND: loop: ; IND-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6406,9 +6406,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: ; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6489,9 +6489,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[LOOP:%.*]] ; UNROLL-NO-IC: loop: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] @@ -6564,9 +6564,9 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: -; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] -; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] +; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ] ; INTERLEAVE-NEXT: br label [[LOOP:%.*]] ; INTERLEAVE: loop: ; INTERLEAVE-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index a72c059265b0f..9456732fceee7 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -219,9 +219,10 @@ exit: ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: scalar.ph: +; DBG-NEXT: EMIT vp<[[RESUME_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> ; DBG-NEXT: No successors ; DBG-EMPTY: -; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_1]]> +; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_P]]> ; DBG-NEXT: } define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) { diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index f6c63f282d5b2..2e8e14ed695a5 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1515,9 +1515,9 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[ENTRY]] ], [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LOAD2:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr36983.ll b/llvm/test/Transforms/LoopVectorize/pr36983.ll index 1538bcf9f911c..f4da4f355eb0d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr36983.ll +++ b/llvm/test/Transforms/LoopVectorize/pr36983.ll @@ -3,8 +3,8 @@ ; There could be more than one LCSSA PHIs in loop exit block. ; CHECK-LABEL: bb1.bb3_crit_edge: -; CHECK: %_tmp133.lcssa1 = phi i16 [ %scalar.recur, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ] -; CHECK: %_tmp133.lcssa = phi i16 [ %scalar.recur, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ] +; CHECK: %_tmp133.lcssa1 = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ] +; CHECK: %_tmp133.lcssa = phi i16 [ %_tmp133, %bb2 ], [ %vector.recur.extract.for.phi, %middle.block ] define void @f1() { bb2.lr.ph: diff --git a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll index b3721056c4d7f..f6923bea4f900 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-first-order-recurrence.ll @@ -23,14 +23,14 @@ define i32 @recurrence_1(ptr nocapture readonly %a, ptr nocapture %b, i32 %n) { ; CHECK-VF4UF1: %[[LOAD]] = load , ptr ; CHECK-VF4UF1: %[[SPLICE:.*]] = call @llvm.vector.splice.nxv4i32( %[[VEC_RECUR]], %[[LOAD]], i32 -1) ; CHECK-VF4UF1: middle.block: -; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF1: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4 -; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2 -; CHECK-VF4UF1: %[[VEC_RECUR_FOR_PHI:.*]] = extractelement %[[LOAD]], i32 %[[SUB3]] ; CHECK-VF4UF1: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF1: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4 ; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL3]], 1 ; CHECK-VF4UF1: %[[VEC_RECUR_EXT:.*]] = extractelement %[[LOAD]], i32 %[[SUB3]] +; CHECK-VF4UF1: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() +; CHECK-VF4UF1: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4 +; CHECK-VF4UF1: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2 +; CHECK-VF4UF1: %[[VEC_RECUR_FOR_PHI:.*]] = extractelement %[[LOAD]], i32 %[[SUB3]] entry: br label %for.preheader @@ -209,14 +209,14 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) { ; CHECK-VF4UF2: %[[ADD1:.*]] = add %{{.*}}, %[[SPLAT1]] ; CHECK-VF4UF2: %[[ADD2]] = add %{{.*}}, %[[SPLAT1]] ; CHECK-VF4UF2: middle.block -; CHECK-VF4UF2: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() -; CHECK-VF4UF2: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4 -; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2 -; CHECK-VF4UF2: %vector.recur.extract.for.phi = extractelement %[[ADD2]], i32 %[[SUB3]] ; CHECK-VF4UF2: %[[VSCALE3:.*]] = call i32 @llvm.vscale.i32() ; CHECK-VF4UF2: %[[MUL3:.*]] = mul i32 %[[VSCALE3]], 4 ; CHECK-VF4UF2: %[[SUB2:.*]] = sub i32 %[[MUL3]], 1 ; CHECK-VF4UF2: %vector.recur.extract = extractelement %[[ADD2]], i32 %[[SUB2]] +; CHECK-VF4UF2: %[[VSCALE2:.*]] = call i32 @llvm.vscale.i32() +; CHECK-VF4UF2: %[[MUL2:.*]] = mul i32 %[[VSCALE2]], 4 +; CHECK-VF4UF2: %[[SUB3:.*]] = sub i32 %[[MUL2]], 2 +; CHECK-VF4UF2: %vector.recur.extract.for.phi = extractelement %[[ADD2]], i32 %[[SUB3]] entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 36bd4f77cd4f2..260a4dd1e095d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -1023,8 +1023,8 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: -; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-from-end ir<%for.1.next>, ir<2> ; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-from-end ir<%for.1.next>, ir<2> ; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1000>, vp<[[VEC_TC]]> ; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]> ; CHECK-NEXT: Successor(s): ir-bb, scalar.ph @@ -1033,10 +1033,11 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph +; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<22> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1.lcssa = vp<[[FOR_RESULT]]> -; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_P]]> ; CHECK-NEXT: } ; entry: From f028ab3f432d27c16b4f1036124ef4546a0371ea Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 9 Jul 2024 12:59:26 +0100 Subject: [PATCH 2/5] !fixup address latest comments, thansk! --- .../Transforms/Vectorize/LoopVectorize.cpp | 112 +++++++++--------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 20 +--- llvm/lib/Transforms/Vectorize/VPlan.h | 32 ++--- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 28 +++-- .../Transforms/Vectorize/VPlanTransforms.cpp | 4 +- 5 files changed, 91 insertions(+), 105 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0580c66e429bb..35e1385dc8a74 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3290,17 +3290,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, if (EnableVPlanNativePath) fixNonInductionPHIs(Plan, State); - // At this point every instruction in the original loop is widened to a - // vector form. Note that fixing reduction phis, as well as extracting the - // exit and resume values for fixed-order recurrences are already modeled in - // VPlan. All that remains to do here is to create a phi in the scalar - // pre-header for each fixed-order recurrence resume value. - // TODO: Also model creating phis in the scalar pre-header in VPlan. - for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) { - if (!Legal->isFixedOrderRecurrence(LO->getPhi())) - continue; - } - // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); PSE.getSE()->forgetBlockAndLoopDispositions(); @@ -3337,8 +3326,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VectorLoop->getHeader(), Plan, State); } - // Fix LCSSA phis not already fixed earlier. Extracts may need to be generated - // in the exit block, so update the builder. State.Builder.SetInsertPoint(State.CFG.ExitBB, State.CFG.ExitBB->getFirstNonPHIIt()); for (const auto &KV : Plan.getLiveOuts()) @@ -8485,9 +8472,59 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop, Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue, Plan); - Plan.addLiveOut( - &ExitPhi, V, - cast(Plan.getVectorLoopRegion()->getSingleSuccessor())); + Plan.addLiveOut(&ExitPhi, V); + } +} + +/// Feed a resume value for every FOR from the vector loop to the scalar loop, +/// if middle block branches to scalar preheader, by introducing ExtractFromEnd +/// and ResumePhi recipes in each, respectively, and a VPLiveOut which uses the +/// latter and corresponds to the scalar header. +static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { + VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); + + // Start by finding out if middle block branches to scalar preheader. + // TODO: Should be replaced by + // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the + // scalar region is modeled as well. + VPBasicBlock *ScalarPHVPBB = nullptr; + auto *MiddleVPBB = cast(VectorRegion->getSingleSuccessor()); + for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) { + if (isa(Succ)) + continue; + assert(!ScalarPHVPBB && "Two candidates for ScalarPHVPBB?"); + ScalarPHVPBB = cast(Succ); + } + if (!ScalarPHVPBB) + return; + + for (auto &H : VectorRegion->getEntryBasicBlock()->phis()) { + auto *FOR = dyn_cast(&H); + if (!FOR) + continue; + + VPBuilder B(ScalarPHVPBB); + VPBuilder MiddleBuilder(MiddleVPBB); + // Reset insert point so new recipes are inserted before terminator and + // condition, if there is either the former or both. + if (auto *Terminator = MiddleVPBB->getTerminator()) { + auto *Condition = dyn_cast(Terminator->getOperand(0)); + assert((!Condition || Condition->getParent() == MiddleVPBB) && + "Condition expected in MiddleVPBB"); + MiddleBuilder.setInsertPoint(Condition ? Condition : Terminator); + } + + // Extract the resume value and create a new VPLiveOut for it. + auto *Resume = MiddleBuilder.createNaryOp( + VPInstruction::ExtractFromEnd, + {FOR->getBackedgeValue(), + Plan.getOrAddLiveIn( + ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1))}, + {}, "vector.recur.extract"); + auto *ResumePhiRecipe = + B.createNaryOp(VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, + {}, "scalar.recur.init"); + Plan.addLiveOut(cast(FOR->getUnderlyingInstr()), ResumePhiRecipe); } } @@ -8655,48 +8692,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { "VPBasicBlock"); RecipeBuilder.fixHeaderPhis(); - auto *MiddleVPBB = - cast(Plan->getVectorLoopRegion()->getSingleSuccessor()); - - VPBasicBlock *ScalarPH = nullptr; - for (VPBlockBase *Succ : MiddleVPBB->getSuccessors()) { - auto *VPBB = dyn_cast(Succ); - if (VPBB && !isa(VPBB)) { - ScalarPH = VPBB; - break; - } - } - - if (ScalarPH) { - for (auto &H : HeaderVPBB->phis()) { - auto *FOR = dyn_cast(&H); - if (!FOR) - continue; - VPBuilder B(ScalarPH); - VPBuilder MiddleBuilder; - // Set insert point so new recipes are inserted before terminator and - // condition, if there is either the former or both. - if (MiddleVPBB->getNumSuccessors() != 2) - MiddleBuilder.setInsertPoint(MiddleVPBB); - else if (isa(MiddleVPBB->getTerminator()->getOperand(0))) - MiddleBuilder.setInsertPoint( - &*std::prev(MiddleVPBB->getTerminator()->getIterator())); - else - MiddleBuilder.setInsertPoint(MiddleVPBB->getTerminator()); - - // Extract the resume value and create a new VPLiveOut for it. - auto *Resume = MiddleBuilder.createNaryOp( - VPInstruction::ExtractFromEnd, - {FOR->getBackedgeValue(), - Plan->getOrAddLiveIn( - ConstantInt::get(Plan->getCanonicalIV()->getScalarType(), 1))}, - {}, "vector.recur.extract"); - auto *R = - B.createNaryOp(VPInstruction::ExitPhi, {Resume, FOR->getStartValue()}, - {}, "scalar.recur.init"); - Plan->addLiveOut(cast(FOR->getUnderlyingInstr()), R, ScalarPH); - } - } + addLiveOutsForFirstOrderRecurrences(*Plan); // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index d062ad0622497..1018a5e8c5707 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -915,11 +915,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, /// VPBB, if any, are rewired to the new VPIRBasicBlock. static void replaceVPBBWithIRVPBB(VPBasicBlock *VPBB, BasicBlock *IRBB) { VPIRBasicBlock *IRMiddleVPBB = new VPIRBasicBlock(IRBB); - for (auto &[_, LO] : VPBB->getPlan()->getLiveOuts()) { - if (LO->getPred() == VPBB) - LO->setPred(IRMiddleVPBB); - } - for (auto &R : make_early_inc_range(*VPBB)) R.moveBefore(*IRMiddleVPBB, IRMiddleVPBB->end()); VPBlockBase *PredVPBB = VPBB->getSinglePredecessor(); @@ -1135,9 +1130,9 @@ LLVM_DUMP_METHOD void VPlan::dump() const { print(dbgs()); } #endif -void VPlan::addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred) { +void VPlan::addLiveOut(PHINode *PN, VPValue *V) { assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists"); - LiveOuts.insert({PN, new VPLiveOut(PN, V, Pred)}); + LiveOuts.insert({PN, new VPLiveOut(PN, V)}); } static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, @@ -1206,18 +1201,9 @@ VPlan *VPlan::duplicate() { remapOperands(Preheader, NewPreheader, Old2NewVPValues); remapOperands(Entry, NewEntry, Old2NewVPValues); - DenseMap Old2NewVPBlocks; - VPBlockBase *OldMiddle = getVectorLoopRegion()->getSingleSuccessor(); - VPBlockBase *NewMiddle = NewPlan->getVectorLoopRegion()->getSingleSuccessor(); - Old2NewVPBlocks[OldMiddle] = NewMiddle; - for (const auto &[Old, New] : - zip(OldMiddle->getSuccessors(), NewMiddle->getSuccessors())) - Old2NewVPBlocks[Old] = New; - // Clone live-outs. for (const auto &[_, LO] : LiveOuts) - NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)], - cast(Old2NewVPBlocks[LO->getPred()])); + NewPlan->addLiveOut(LO->getPhi(), Old2NewVPValues[LO->getOperand(0)]); // Initialize remaining fields of cloned VPlan. NewPlan->VFs = VFs; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 8d3c681cd903d..2ed16cca03cb6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -682,27 +682,23 @@ class VPBlockBase { }; /// A value that is used outside the VPlan. The operand of the user needs to be -/// added to the associated LCSSA phi node. +/// added to the associated LCSSA phi node. The incoming block from VPlan is +/// determined by where the VPValue is defined: if it is defined by a recipe +/// outside a region, its parent block is used, otherwise the middle block is +/// used. class VPLiveOut : public VPUser { PHINode *Phi; - /// Predecessor in VPlan of this live-out. Used to as block to set the - /// incoming value for. - VPBasicBlock *Pred; - public: - VPLiveOut(PHINode *Phi, VPValue *Op, VPBasicBlock *Pred) - : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi), Pred(Pred) {} + VPLiveOut(PHINode *Phi, VPValue *Op) + : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {} static inline bool classof(const VPUser *U) { return U->getVPUserID() == VPUser::VPUserID::LiveOut; } - /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply - /// means we need to add the appropriate incoming value from the middle - /// block as exiting edges from the scalar epilogue loop (if present) are - /// already in place, and we exit the vector loop exclusively to the middle - /// block. + /// Fixup the wrapped LCSSA phi node. This means we need to add the + /// appropriate incoming value from the precessor Pred. void fixPhi(VPlan &Plan, VPTransformState &State); /// Returns true if the VPLiveOut uses scalars of operand \p Op. @@ -714,11 +710,6 @@ class VPLiveOut : public VPUser { PHINode *getPhi() const { return Phi; } - /// Returns to incoming block for which to set the value. - VPBasicBlock *getPred() const { return Pred; } - - void setPred(VPBasicBlock *Pred) { this->Pred = Pred; } - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the VPLiveOut to \p O. void print(raw_ostream &O, VPSlotTracker &SlotTracker) const; @@ -1202,8 +1193,9 @@ class VPInstruction : public VPRecipeWithIRFlags { ExplicitVectorLength, /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan. /// The first operand is the incoming value from the predecessor in VPlan, - /// the second operand is the incoming value for all other predecessors. - ExitPhi, + /// the second operand is the incoming value for all other predecessors + /// (which are currently not modeled in VPlan). + ResumePhi, CalculateTripCountMinusVF, // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, @@ -3346,7 +3338,7 @@ class VPlan { return cast(&*EntryVPBB->begin()); } - void addLiveOut(PHINode *PN, VPValue *V, VPBasicBlock *Pred); + void addLiveOut(PHINode *PN, VPValue *V); void removeLiveOut(PHINode *PN) { delete LiveOuts[PN]; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 6e757cd72f082..241676a535a14 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -193,7 +193,13 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { auto Lane = vputils::isUniformAfterVectorization(ExitValue) ? VPLane::getFirstLane() : VPLane::getLastLaneForVF(State.VF); - BasicBlock *PredBB = State.CFG.VPBB2IRBB[Pred]; + VPBasicBlock *PredVPBB = + cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); + VPRecipeBase *DefRecipe = ExitValue->getDefiningRecipe(); + if (DefRecipe && !DefRecipe->getParent()->getParent()) + PredVPBB = DefRecipe->getParent(); + BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; + State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); if (Phi->getBasicBlockIndex(PredBB) != -1) Phi->setIncomingValueForBlock(PredBB, V); @@ -304,7 +310,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: - case VPInstruction::ExitPhi: + case VPInstruction::ResumePhi: return true; default: return false; @@ -595,20 +601,24 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { Value *Addend = State.get(getOperand(1), Part, /* IsScalar */ true); return Builder.CreatePtrAdd(Ptr, Addend, Name); } - case VPInstruction::ExitPhi: { + case VPInstruction::ResumePhi: { if (Part != 0) return State.get(this, 0, /*IsScalar*/ true); Value *IncomingFromVPlanPred = State.get(getOperand(0), Part, /* IsScalar */ true); - Value *IncomingFromOtherPred = + Value *IncomingFromOtherPreds = State.get(getOperand(1), Part, /* IsScalar */ true); - auto *NewPhi = Builder.CreatePHI(IncomingFromOtherPred->getType(), 2, Name); + auto *NewPhi = + Builder.CreatePHI(IncomingFromOtherPreds->getType(), 2, Name); BasicBlock *VPlanPred = State.CFG .VPBB2IRBB[cast(getParent()->getSinglePredecessor())]; NewPhi->addIncoming(IncomingFromVPlanPred, VPlanPred); - for (auto *BB : predecessors(Builder.GetInsertBlock())) - NewPhi->addIncoming(IncomingFromOtherPred, BB); + for (auto *OtherPred : predecessors(Builder.GetInsertBlock())) { + assert(OtherPred != VPlanPred && + "VPlan predecessors should not be connected yet"); + NewPhi->addIncoming(IncomingFromOtherPreds, OtherPred); + } return NewPhi; } @@ -619,7 +629,7 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || - getOpcode() == VPInstruction::ExitPhi || + getOpcode() == VPInstruction::ResumePhi || getOpcode() == VPInstruction::ComputeReductionResult; } @@ -749,7 +759,7 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; - case VPInstruction::ExitPhi: + case VPInstruction::ResumePhi: O << "exit-phi"; break; case VPInstruction::ExplicitVectorLength: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 5a773e8f35563..29837481dc389 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -937,7 +937,9 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, Type *IntTy = Plan.getCanonicalIV()->getScalarType(); // Extract the penultimate value of the recurrence and update VPLiveOut - // users of the recurrence splice. + // users of the recurrence splice. Note that the extract of the final value + // used to resume in the scalar loop is created earlier during VPlan + // construction. auto *Penultimate = cast(MiddleBuilder.createNaryOp( VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), From 3716289dd93817082c82ad37176d69bca990bab8 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 10 Jul 2024 16:14:14 +0100 Subject: [PATCH 3/5] !ifxup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 50 +++++++++---------- llvm/lib/Transforms/Vectorize/VPlan.h | 8 +-- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 15 ++++-- .../first-order-recurrence-chains-vplan.ll | 10 ++-- ...-order-recurrence-sink-replicate-region.ll | 12 ++--- .../interleave-and-scalarize-only.ll | 2 +- .../LoopVectorize/vplan-printing.ll | 2 +- 7 files changed, 54 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index e4f4ac3ffafb6..57291bcd2e0a3 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3318,8 +3318,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VectorLoop->getHeader(), Plan, State); } - State.Builder.SetInsertPoint(State.CFG.ExitBB, - State.CFG.ExitBB->getFirstNonPHIIt()); + // Fix LCSSA phis not already fixed earlier. for (const auto &KV : Plan.getLiveOuts()) KV.second->fixPhi(Plan, State); @@ -8687,7 +8686,9 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB, Loop *OrigLoop, static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { VPRegionBlock *VectorRegion = Plan.getVectorLoopRegion(); - // Start by finding out if middle block branches to scalar preheader. + // Start by finding out if middle block branches to scalar preheader, which is + // not a VPIRBasicBlock, unlike Exit block - the other possible successor of + // middle block. // TODO: Should be replaced by // Plan->getScalarLoopRegion()->getSinglePredecessor() in the future once the // scalar region is modeled as well. @@ -8702,32 +8703,31 @@ static void addLiveOutsForFirstOrderRecurrences(VPlan &Plan) { if (!ScalarPHVPBB) return; - for (auto &H : VectorRegion->getEntryBasicBlock()->phis()) { - auto *FOR = dyn_cast(&H); + VPBuilder ScalarPHBuilder(ScalarPHVPBB); + VPBuilder MiddleBuilder(MiddleVPBB); + // Reset insert point so new recipes are inserted before terminator and + // condition, if there is either the former or both. + if (auto *Terminator = MiddleVPBB->getTerminator()) { + auto *Condition = dyn_cast(Terminator->getOperand(0)); + assert((!Condition || Condition->getParent() == MiddleVPBB) && + "Condition expected in MiddleVPBB"); + MiddleBuilder.setInsertPoint(Condition ? Condition : Terminator); + } + VPValue *OneVPV = Plan.getOrAddLiveIn( + ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); + + for (auto &HeaderPhi : VectorRegion->getEntryBasicBlock()->phis()) { + auto *FOR = dyn_cast(&HeaderPhi); if (!FOR) continue; - VPBuilder B(ScalarPHVPBB); - VPBuilder MiddleBuilder(MiddleVPBB); - // Reset insert point so new recipes are inserted before terminator and - // condition, if there is either the former or both. - if (auto *Terminator = MiddleVPBB->getTerminator()) { - auto *Condition = dyn_cast(Terminator->getOperand(0)); - assert((!Condition || Condition->getParent() == MiddleVPBB) && - "Condition expected in MiddleVPBB"); - MiddleBuilder.setInsertPoint(Condition ? Condition : Terminator); - } - // Extract the resume value and create a new VPLiveOut for it. - auto *Resume = MiddleBuilder.createNaryOp( - VPInstruction::ExtractFromEnd, - {FOR->getBackedgeValue(), - Plan.getOrAddLiveIn( - ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1))}, - {}, "vector.recur.extract"); - auto *ResumePhiRecipe = - B.createNaryOp(VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, - {}, "scalar.recur.init"); + auto *Resume = MiddleBuilder.createNaryOp(VPInstruction::ExtractFromEnd, + {FOR->getBackedgeValue(), OneVPV}, + {}, "vector.recur.extract"); + auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( + VPInstruction::ResumePhi, {Resume, FOR->getStartValue()}, {}, + "scalar.recur.init"); Plan.addLiveOut(cast(FOR->getUnderlyingInstr()), ResumePhiRecipe); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ba0e88f872a1d..862a52a8c72a6 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -697,7 +697,7 @@ class VPBlockBase { }; /// A value that is used outside the VPlan. The operand of the user needs to be -/// added to the associated LCSSA phi node. The incoming block from VPlan is +/// added to the associated phi node. The incoming block from VPlan is /// determined by where the VPValue is defined: if it is defined by a recipe /// outside a region, its parent block is used, otherwise the middle block is /// used. @@ -712,8 +712,10 @@ class VPLiveOut : public VPUser { return U->getVPUserID() == VPUser::VPUserID::LiveOut; } - /// Fixup the wrapped LCSSA phi node. This means we need to add the - /// appropriate incoming value from the precessor Pred. + /// Fix the wrapped phi node. This means adding an incoming value to exit + /// block phi's from the vector loop via middle block (values from scalar loop + /// already reach these phi's), and updating the value to scalar header phi's + /// from the scalar preheader. void fixPhi(VPlan &Plan, VPTransformState &State); /// Returns true if the VPLiveOut uses scalars of operand \p Op. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 788fbcadece33..864743761e25b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -194,12 +194,18 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { auto Lane = vputils::isUniformAfterVectorization(ExitValue) ? VPLane::getFirstLane() : VPLane::getLastLaneForVF(State.VF); - VPBasicBlock *PredVPBB = + VPBasicBlock *MiddleVPBB = cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); VPRecipeBase *DefRecipe = ExitValue->getDefiningRecipe(); - if (DefRecipe && !DefRecipe->getParent()->getParent()) - PredVPBB = DefRecipe->getParent(); + auto *ExitingVPBB = DefRecipe ? DefRecipe->getParent() : nullptr; + // Values leaving the vector loop reach live out phi's in the exiting block + // via middle block. + auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion() + ? MiddleVPBB + : ExitingVPBB; BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; + // Set insertion point in PredBB in case an extract needs to be generated. + // TODO: Model extracts explicitly. State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); Value *V = State.get(ExitValue, VPIteration(State.UF - 1, Lane)); if (Phi->getBasicBlockIndex(PredBB) != -1) @@ -751,6 +757,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: case VPInstruction::BranchOnCond: + case VPInstruction::ResumePhi: return true; }; llvm_unreachable("switch should return"); @@ -804,7 +811,7 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, O << "active lane mask"; break; case VPInstruction::ResumePhi: - O << "exit-phi"; + O << "resume-phi"; break; case VPInstruction::ExplicitVectorLength: O << "EXPLICIT-VECTOR-LENGTH"; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 55c49517ef8fa..66a50f6e3f373 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -43,8 +43,8 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<22> -; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = exit-phi vp<[[RESUME_2]]>, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = resume-phi vp<[[RESUME_2]]>, ir<33> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> @@ -115,9 +115,9 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<22> -; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = exit-phi vp<[[RESUME_2]]>, ir<33> -; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]> = exit-phi vp<[[RESUME_3]]>, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_2_P:%.*]]> = resume-phi vp<[[RESUME_2]]>, ir<33> +; CHECK-NEXT: EMIT vp<[[RESUME_3_P:%.*]]> = resume-phi vp<[[RESUME_3]]>, ir<33> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1_P]]> diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 3412527ad4b77..bff730f263ab2 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -81,7 +81,7 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]> @@ -164,7 +164,7 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> @@ -246,7 +246,7 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %res = vp<[[RED_RES]]> @@ -354,7 +354,7 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1_P]]> @@ -448,7 +448,7 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]> @@ -531,7 +531,7 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1_P]]> diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 9456732fceee7..7ed42edfc7753 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -219,7 +219,7 @@ exit: ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: scalar.ph: -; DBG-NEXT: EMIT vp<[[RESUME_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<0> +; DBG-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0> ; DBG-NEXT: No successors ; DBG-EMPTY: ; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_P]]> diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 260a4dd1e095d..3a664de748d2d 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -1033,7 +1033,7 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: scalar.ph -; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = exit-phi vp<[[RESUME_1]]>, ir<22> +; CHECK-NEXT: EMIT vp<[[RESUME_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<22> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1.lcssa = vp<[[FOR_RESULT]]> From 24133b78915c19ab3a5b90ab9c8692421cf6e857 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Jul 2024 12:49:57 +0100 Subject: [PATCH 4/5] !fixup address latest comments, thanks! --- .../Transforms/Vectorize/LoopVectorize.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +++++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 20 ++++++++++--------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 57291bcd2e0a3..2059ab31a5889 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3318,7 +3318,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VectorLoop->getHeader(), Plan, State); } - // Fix LCSSA phis not already fixed earlier. + // Fix live-out phis not already fixed earlier. for (const auto &KV : Plan.getLiveOuts()) KV.second->fixPhi(Plan, State); diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 862a52a8c72a6..e3b78700e1018 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1392,6 +1392,10 @@ class VPInstruction : public VPRecipeWithIRFlags { /// Returns true if this VPInstruction produces a scalar value from a vector, /// e.g. by performing a reduction or extracting a lane. bool isVectorToScalar() const; + + /// Returns true if this VPInstruction's operands are single scalars and the + /// result is also a single scalar. + bool isSingleScalar() const; }; /// VPWidenRecipe is a recipe for producing a copy of vector type its @@ -3743,7 +3747,7 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { if (auto *GEP = dyn_cast(Def)) return all_of(GEP->operands(), isUniformAfterVectorization); if (auto *VPI = dyn_cast(Def)) - return VPI->isVectorToScalar(); + return VPI->isSingleScalar() || VPI->isVectorToScalar(); return false; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 864743761e25b..25279ad837c3e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -196,8 +196,8 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { : VPLane::getLastLaneForVF(State.VF); VPBasicBlock *MiddleVPBB = cast(Plan.getVectorLoopRegion()->getSingleSuccessor()); - VPRecipeBase *DefRecipe = ExitValue->getDefiningRecipe(); - auto *ExitingVPBB = DefRecipe ? DefRecipe->getParent() : nullptr; + VPRecipeBase *ExitingRecipe = ExitValue->getDefiningRecipe(); + auto *ExitingVPBB = ExitingRecipe ? ExitingRecipe->getParent() : nullptr; // Values leaving the vector loop reach live out phi's in the exiting block // via middle block. auto *PredVPBB = !ExitingVPBB || ExitingVPBB->getEnclosingLoopRegion() @@ -350,7 +350,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const { bool VPInstruction::canGenerateScalarForFirstLane() const { if (Instruction::isBinaryOp(getOpcode())) return true; - if (isVectorToScalar()) + if (isScalar() || isVectorToScalar()) return true; switch (Opcode) { case Instruction::ICmp: @@ -360,7 +360,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::PtrAdd: case VPInstruction::ExplicitVectorLength: - case VPInstruction::ResumePhi: return true; default: return false; @@ -679,10 +678,13 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { bool VPInstruction::isVectorToScalar() const { return getOpcode() == VPInstruction::ExtractFromEnd || - getOpcode() == VPInstruction::ResumePhi || getOpcode() == VPInstruction::ComputeReductionResult; } +bool VPInstruction::isSingleScalar() const { + return getOpcode() == VPInstruction::ResumePhi; +} + #if !defined(NDEBUG) bool VPInstruction::isFPMathOp() const { // Inspired by FPMathOperator::classof. Notable differences are that we don't @@ -703,9 +705,9 @@ void VPInstruction::execute(VPTransformState &State) { if (hasFastMathFlags()) State.Builder.setFastMathFlags(getFastMathFlags()); State.setDebugLocFrom(getDebugLoc()); - bool GeneratesPerFirstLaneOnly = - canGenerateScalarForFirstLane() && - (vputils::onlyFirstLaneUsed(this) || isVectorToScalar()); + bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && + (vputils::onlyFirstLaneUsed(this) || + isVectorToScalar() || isSingleScalar()); bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this); for (unsigned Part = 0; Part < State.UF; ++Part) { @@ -732,7 +734,7 @@ void VPInstruction::execute(VPTransformState &State) { assert(GeneratedValue && "generatePerPart must produce a value"); assert((GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly || - State.VF.isScalar()) && + State.VF.isSingleScalar()) && "scalar value but not only first lane defined"); State.set(this, GeneratedValue, Part, /*IsScalar*/ GeneratesPerFirstLaneOnly); From 5e61cb4d5d816ba1910f913f681ccd8196433e88 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 11 Jul 2024 14:27:04 +0100 Subject: [PATCH 5/5] !fixup fix incorrect renames --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 25279ad837c3e..53d91ee27b73f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -350,7 +350,7 @@ bool VPInstruction::doesGeneratePerAllLanes() const { bool VPInstruction::canGenerateScalarForFirstLane() const { if (Instruction::isBinaryOp(getOpcode())) return true; - if (isScalar() || isVectorToScalar()) + if (isSingleScalar() || isVectorToScalar()) return true; switch (Opcode) { case Instruction::ICmp: @@ -734,7 +734,7 @@ void VPInstruction::execute(VPTransformState &State) { assert(GeneratedValue && "generatePerPart must produce a value"); assert((GeneratedValue->getType()->isVectorTy() == !GeneratesPerFirstLaneOnly || - State.VF.isSingleScalar()) && + State.VF.isScalar()) && "scalar value but not only first lane defined"); State.set(this, GeneratedValue, Part, /*IsScalar*/ GeneratesPerFirstLaneOnly);